* Remove old docs

2025-08-09 22:54:53 +03:00 · 2015-08-22 22:06:30 +02:00 · 2015-08-22 22:06:30 +02:00 · 890d6aa216
commit 890d6aa216
parent cad0cca4e3
21 changed files with 0 additions and 3837 deletions
--- a/docs/redesign/api.jade
+++ b/docs/redesign/api.jade
@ -1,661 +0,0 @@
-mixin declare_class(name)
-  details
-    summary
-      span.declaration
-        span.label class
-        code #{name}
-    block
-
-mixin method(name, parameters)
-  details(open=attributes.open)
-    summary
-      span.declaration
-        span.label #{name}
-        span.parameters
-          | self, #{parameters}
-    block
-
-
-mixin params
-  ul
-    block
-
-
-mixin param(name, type, value)
-  li
-    if type
-      <strong>#{name}</strong> (!{type}) &#8211;
-    else
-      <strong>#{name}</strong> &#8211;
-    block
-
-
-mixin attribute(name, type, value)
-  details(open=attributes.open)
-    summary
-      span.declaration
-        span.label #{name}
-    block
-
-
-mixin returns(name, type, value)
-  li
-    if type
-      <strong>#{name}</strong> (!{type}) &#8211;
-    else
-      <strong>#{name}</strong> &#8211;
-    block
-
-
-mixin returns(type)
-  | tmp
-
-mixin init
-  details
-    summary: h4 Init
-
-    block
-
-
-mixin callable
-  details
-    summary: h4 Callable
-
-    block
-
-
-mixin sequence
-  details
-    summary: h4 Sequence
-
-    block
-
-
-mixin maptype
-  details
-    summary: h4 Map
-
-    block
-
-
-mixin summary
-  block
-
-mixin en_example
-  pre.language-python
-    code
-      | from spacy.en import English
-      | from spacy._doc_examples import download_war_and_peace
-      | 
-      | unprocessed_unicode = download_war_and_peace()
-      | 
-      | nlp = English()
-      | doc = nlp(unprocessed_unicode)
-
-
-+declare_class("English")
-  p Load models into a callable object to process English text.
-
-  +summary
-    +en_example
-
-  +init
-    p
-      | Load the resources.  Loading takes 20 seconds, and the instance
-      | consumes 2 to 3 gigabytes of memory.
-    
-    p 
-      | Intended use is for one instance to be created per process.
-      | You can create more if you're doing something unusual.
-    p
-      | You may wish to make the instance a global variable or "singleton".
-      | We usually instantiate the object in the <code>main()</code>
-      | function and pass it around as an explicit argument. 
-    +method("__init__", "data_dir=True, Tagger=True, Parser=True, Entity=True, Matcher=True, Packer=None, load_vectors=True")(open="true")
-
-      +params
-        +param("data_dir")
-          | The data directory.  May be #{None}, to disable any data loading
-          | (including the vocabulary).
-
-        +param("Tokenizer")
-          | A class/function that creates the tokenizer.
-
-        +param("Tagger")
-          | A class/function that creates the part-of-speech tagger.
-
-        +param("Parser")
-          | A class/function that creates the dependency parser.
-
-        +param("Entity")
-          | A class/function that creates the named entity recogniser.
-
-        +param("load_vectors")
-          | A boolean value to control whether the word vectors are loaded.
-  
-  +callable
-    +method("__call__", "text, tag=True, parse=True, entity=True")
-
-      +params
-        +param("text", types.unicode)
-          | The text to be processed.  No pre-processing needs to be applied,
-          | and any length of text can be submitted.  Usually you will submit
-          | a whole document. Text may be zero-length. An exception is raised
-          | if byte strings are supplied.
-
-        +param("tag", types.bool)
-          | Whether to apply the part-of-speech tagger. Required for parsing
-          | and entity recognition.
-
-        +param("parse", types.bool)
-          | Whether to apply the syntactic dependency parser.
-
-        +param("entity", types.bool)
-          | Whether to apply the named entity recognizer.
-
-      pre.language-python
-        code
-          | from spacy.en import English
-          | nlp = English()
-          | doc = nlp(u'Some text.) # Applies tagger, parser, entity
-          | doc = nlp(u'Some text.', parse=False) # Applies tagger and entity, not parser
-          | doc = nlp(u'Some text.', entity=False) # Applies tagger and parser, not entity
-          | doc = nlp(u'Some text.', tag=False) # Does not apply tagger, entity or parser
-          | doc = nlp(u'') # Zero-length tokens, not an error
-          | # doc = nlp(b'Some text') <-- Error: need unicode
-          | doc = nlp(b'Some text'.decode('utf8')) # Encode to unicode first.
-
-
-+declare_class("Doc")
-  p I'm a doc
-
-  +init
-    +method("__init__", "vocab")
-      +params
-        +param("vocab", vocab_type)
-          | A vocabulary object
-
-  +sequence
-    +method("__getitem__", "i", types.int)
-      +returns(types.Token)
-
-    +method("__getitem__", "start_end", types.slice)
-      +returns(types.Span)
-
-    +method("__iter__")
-      | Iterate over tokens
-
-    +method("__len__")
-      | Number of tokens in the document.
-
-  details
-    summary: h4 Spans
-    
-    +attribute("sents", types.generator)
-      | Iterate over sentences in the document.
-  
-    +attribute("ents", types.generator)
-      | Iterate over named entities in the document.
-
-    +attribute("noun_chunks", types.generator)
-  
-  details
-    summary: h4 Export/Import
-    
-    +method("to_array", "attr_ids")
-
-      | Given a list of M attribute IDs, export the tokens to a numpy ndarray
-      | of shape N*M, where N is the length of the sentence.
-
-      +params
-        +param("attr_ids", "list[int]")
-          | A list of attribute ID ints.
-
-      +returns("feat_array")
-        | A feature matrix, with one row per word, and one column per attribute
-        | indicated in the input attr_ids.
-
-    +method("count_by", "attr_id")
-      | Produce a dict of {attribute (int): count (ints)} frequencies, keyed
-      | by the values of the given attribute ID.
-    
-      pre.language-python
-        code
-          | >>> from spacy.en import English, attrs
-          | >>> nlp = English()
-          | >>> tokens = nlp(u'apple apple orange banana')
-          | >>> tokens.count_by(attrs.ORTH)
-          | {12800L: 1, 11880L: 2, 7561L: 1}
-          | >>> tokens.to_array([attrs.ORTH])
-          | array([[11880],
-          |         [11880],
-          |         [7561],
-          |         [12800]])
-
-    +method("from_array", "attrs, array")
-      | Load from array
-  
-    +method("from_bytes")
-      | Deserialize, loading from bytes
-
-    +method("read_bytes")
-      | classmethod
-
-    //+method("merge", "int start_idx, int end_idx, unicode tag, unicode lemma, unicode ent_type")
-
-    //  | Merge a multi-word expression into a single token.  Currently
-    //  | experimental; API is likely to change.
-
-
-+declare_class("Token")
-  +init
-    +method("__init__", "vocab, doc, offset")
-      +params
-        +param("vocab", types.Vocab)
-          p A Vocab object
-
-        +param("doc", types.Doc)
-          p The parent sequence
-
-      +param("offset", types.int)
-        p The index of the token within the document
-
-  details
-    summary: h4 String Views
-
-    +attribute("orth / orth_")
-      | The form of the word with no string normalization or processing, as
-      | it appears in the string, without trailing whitespace.
-
-    +attribute("lemma / lemma_")
-      | The "base" of the word, with no inflectional suffixes, e.g. the lemma of
-      | "developing" is "develop", the lemma of "geese" is "goose", etc.  Note that
-      | <em>derivational</em> suffixes are not stripped, e.g. the lemma of
-      | "instutitions" is "institution", not "institute".  Lemmatization is
-      | performed using the WordNet data, but extended to also cover closed-class
-      | words such as pronouns.  By default, the WN lemmatizer returns "hi"
-      | as the lemma of "his". We assign pronouns the lemma -PRON-.
-
-    +attribute("lower / lower_")
-      | The form of the word, but forced to lower-case, i.e.
-      pre.language-python: code lower = word.orth\_.lower()
-
-    //+attribute("norm / norm_")
-    //  | The form of the word, after language-specific normalizations has been
-    //  | applied.
-
-    +attribute("shape / shape_")
-      | A transform of the word's string, to show orthographic features.
-      | The characters a-z are mapped to x, A-Z is mapped to X, 0-9 is mapped
-      | to d. After these mappings, sequences of 4 or more of the same character
-      | are truncated to length 4.  Examples: C3Po --> XdXx, favorite --> xxxx,
-      | :) --> :)
-
-    +attribute("prefix / prefix_")
-      | A length-N substring from the start of the word.  Length may vary by
-      | language; currently for English n=1, i.e.
-      pre.language-python: code prefix = word.orth\_[:1]
-
-    +attribute("suffix / suffix_")
-      | A length-N substring from the end of the word.  Length may vary by
-      | language; currently for English n=3, i.e.
-      pre.language-python: code suffix = word.orth\_[-3:]
-
-    //+attribute("lex_id")
-    //  | lex_id
-
-  details
-    summary: h4 Alignment and Output
-
-    +attribute("idx")
-      p Start index of the token in the string
-
-    +method("__len__", "")
-      p Length of the token's orth string, in unicode code-points.
-
-    +method("__unicode__", "")
-      p Same as token.orth_
-
-    +method("__str__", "")
-      p Varies between Python 2 and Python 3
-
-    +attribute("string")
-      p
-        | The form of the word as it appears in the string, <strong>including
-        | trailing whitespace</strong>.  This is useful when you need to use
-        | linguistic features to add inline mark-up to the string.
-
-    +method("nbor, i=1")
-      +params
-        +param("i")
-          p Offset relative to token
-
-  details
-    summary: h4 Distributional Features
-
-    +attribute("repvec")
-      p
-        | A "word embedding" representation: a dense real-valued vector that supports
-        | similarity queries between words.  By default, spaCy currently loads
-        | vectors produced by the Levy and Goldberg (2014) dependency-based word2vec
-        | model.
-
-    +attribute("cluster")
-      p
-        | The Brown cluster ID of the word.  These are often useful features for
-        | linear models.  If you're using a non-linear model, particularly a
-        | neural net or random forest, consider using the real-valued word
-        | representation vector, in Token.repvec, instead.
-
-    +attribute("prob")
-      p
-        | The unigram log-probability of the word, estimated from counts from a
-        | large corpus, smoothed using Simple Good Turing estimation.
-
-  details
-    summary: h4 Syntactic Tags
-
-    +attribute("pos / pos_")
-      p
-        | A part-of-speech tag, from the Google Universal Tag Set, e.g. 
-        | code>NOUN</code>, <code>VERB</code>, <code>ADV</code>.  Constants for
-        | the 17 tag values are provided in <code>spacy.parts_of_speech.</code>
-
-    +attribute("tag / tag_")
-      p
-        | A morphosyntactic tag, e.g. <code>NN</code>, <code>VBZ</code>,
-        | <code>DT</code>, etc.  These tags are language/corpus specific, and
-        | typically describe part-of-speech and some amount of morphological
-        | information.  For instance, in the Penn Treebank tag set, <code>VBZ</code>
-        | is assigned to a present-tense singular verb.
-
-    +attribute("dep / dep_")
-      p
-        | The type of syntactic dependency relation between the word and its
-        | syntactic head.
-
-  details
-    summary: h4 Navigating the Parse Tree
-  
-    +attribute("head")
-      p
-        | The Token that is the immediate syntactic head of the word.  If the
-        | word is the root of the dependency tree, the same word is returned.
-
-    +attribute("lefts")
-      p
-        | An iterator for the immediate leftward syntactic children of the
-        | word.
-
-    +attribute("rights")
-      p
-        | An iterator for the immediate rightward syntactic children of the
-        | word.
-
-    +attribute("n_lefts")
-      p
-        | The number of immediate syntactic children preceding the word in 
-        | the string.
-
-    +attribute("n_rights")
-      p
-        | The number of immediate syntactic children following the word in
-        | the string.
-
-    +attribute("children")
-      p
-        | An iterator that yields from lefts, and then yields from rights.
-
-    +attribute("subtree")
-      p
-        | An iterator for the part of the sentence syntactically governed by
-        | the word, including the word itself.
-
-    +attribute("left_edge")
-      p The leftmost edge of the token's subtree
-
-    +attribute("right_edge")
-      p The rightmost edge of the token's subtree
-
-  details
-    summary: h4 Named Entities
-
-    +attribute("ent_type")
-      p If the token is part of an entity, its entity type.
-
-    +attribute("ent_iob")
-      p The IOB (inside, outside, begin) entity recognition tag for the token.
-
-  details
-    summary: h4 Lexeme Flags
-
-    +method("check_flag", "flag_id")
-      +params
-        +param("flag_id")
-          | flag ID
-
-    +attribute("is_oov")
-    +attribute("is_alpha")
-    +attribute("is_ascii")
-    +attribute("is_digit")
-    +attribute("is_lower")
-    +attribute("is_title")
-    +attribute("is_punct")
-    +attribute("is_space")
-    +attribute("like_url")
-    +attribute("like_num")
-    +attribute("like_email")
-
-    //+attribute("conjuncts")
-    //  | Conjuncts
-
-+declare_class("Span")
-  +init
-    +method("__init__")
-      Temp
-
-    <code>span = doc[0:4]</code>
-
-  +sequence
-    +method("__getitem__")
-      p Get item
-
-    +method("__iter__")
-      p Iter
-        
-    +method("__len__")
-      p Len
-
-  details
-    summary: h4 Parse
-
-    +attribute("root")
-      p Syntactic head
-
-    +attribute("lefts")
-      p Tokens that are:
-      ol
-        li To the left of the span;
-        li Syntactic children of words within the span
-
-      p i.e.
-
-      pre.language-python
-        code
-          | lefts = [span.doc[i] for i in range(0, span.start)
-          |          if span.doc[i].head in span]
-
-    +attribute("rights")
-      p Tokens that are:
-        ol 
-          li To the right of the span;
-          li Syntactic children of words within the span
-      p i.e.
-      pre.language-python
-        code
-          | rights = [span.doc[i] for i in range(span.end, len(span.doc))
-          |           if span.doc[i].head in span]
-
-
-    +attribute("subtree")
-      p String
-
-  details
-    summary: h4 String Views
-
-    +attribute("string")
-      p String
-
-    +attribute("lemma / lemma_")
-      p String
-
-    +attribute("label / label_")
-      p String
-
-+declare_class("Lexeme")
-  p
-    | The Lexeme object represents a lexical type, stored in the vocabulary
-    | &ndash; as opposed to a token, occurring in a document.
-  p
-    | Lexemes store various features, so that these features can be computed
-    | once per type, rather than once per token. As job sizes grow, this
-    | can amount to a substantial efficiency improvement.
-
-  p
-    | All Lexeme attributes are therefore context independent, as a single
-    | lexeme is reused for all usages of that word. Lexemes are keyed by
-    | the “orth” attribute.
-
-  p
-    All Lexeme attributes are accessible directly on the Token object.
-
-  +init
-    +method("__init__")
-      p Init
-
-    details
-      summary: h4 String Features
-
-        +attribute("orth / orth_")
-          p
-            | The form of the word with no string normalization or processing,
-            | as it appears in the string, without trailing whitespace.
-      
-        +attribute("lower / lower_")
-          p Tmp
-      
-        +attribute("norm / norm_")
-          p Tmp
-      
-        +attribute("shape / shape_")
-          p Tmp
-      
-        +attribute("prefix / prefix_")
-          p Tmp
-      
-        +attribute("suffix / suffix_")
-          p TMP
-
-+declare_class("Vocab", "data_dir=None, lex_props_getter=None")
-  +sequence
-    +method("__len__")
-      +returns
-        p Number of words in the vocabulary.
-
-    +method("__iter__")
-      +returns
-        p Lexeme
-
-  +maptype
-    +method("__getitem__", "key_int")
-      +params
-        +param("key")
-          p Integer ID
-
-      +returns: p A Lexeme object
-
-    +method("__getitem__", "key_str")
-      +params
-        +param("key_str", types.unicode)
-          p A string in the vocabulary
-
-      +returns("Lexeme")
-
-    +method("__setitem__", "orth_str", "props")
-      +params
-        +param("orth_str", types.unicode)
-          p The orth key
-
-        +param("props", types.dict)
-          p A props dictionary
-
-      +returns("None")
-
-  details
-    summary: h4 Import/Export
-
-    +method("dump", "loc")
-      +params
-        +param("loc", types.unicode)
-          p Path where the vocabulary should be saved
-
-    +method("load_lexemes", "loc")
-    +params
-      +param("loc", types.unicode)
-        p Path to load the lexemes.bin file from
-
-    +method("load_vectors", "loc")
-      +params
-        +param("loc", types.unicode)
-          p Path to load the vectors.bin from
-
-+declare_class("StringStore")
-  +init
-    Tmp
-
-  +sequence
-    +method("__len__")
-      +returns("int")
-        p Number of strings in the string-store
-
-    +method("__iter__")
-      +returns
-        p Lexeme
-
-  +maptype
-    +method("__getitem__", "key_int")
-      +params
-        +param("key_int")
-          p An integer key
-
-      +returns(types.unicode)
-        p The string that the integer key maps to
-
-    +method("__getitem__", "key_unicode")
-      +params
-        +param("key_unicode")
-          p A key, as a unicode string
-
-      +returns(types.int)
-        p The integer ID of the string.
-
-    +method("__getitem__", "key_utf8_bytes")
-      +params
-        +param("key_utf8_bytes", types.bytes)
-          p p A key, as a UTF-8 encoded byte-string
-
-      +returns(types.int)
-        p The integer ID of the string.
-
-  details
-    summary: h4 Import/Export
-
-    +method("dump", "loc")
-      +params
-        +param("loc")
-          p File path to save the strings.txt to.
-
-    +method("load")
-      +params
-        +param("loc")
-          p File path to load the strings.txt from.
--- a/docs/redesign/blog.jade
+++ b/docs/redesign/blog.jade
@ -1,95 +0,0 @@
-mixin Teaser(title, url, date_long, date_short, author, lede)
-  article.post
-    header
-      h2
-        a(href=url)= title
-      .subhead
-        | by 
-        a(href='#', rel='author')= author
-        |  on 
-        time(datetime=date_short)= date_long
-    p!= lede
-      &nbsp;
-      a.readmore(href='#') ►
-
-
-
-doctype html
-html(lang='en')
-  head
-    meta(charset='utf-8')
-    title spaCy Blog
-    meta(name='description', content='')
-    meta(name='author', content='Matthew Honnibal')
-    link(rel='stylesheet', href='css/style.css')
-    //if lt IE 9
-      script(src='http://html5shiv.googlecode.com/svn/trunk/html5.js')
-  body#blog
-    header(role='banner')
-      h1.logo spaCy Blog
-      .slogan Blog
-
-    nav(role="navigation")
-      ul
-        li: a(href="home.html")        Home
-        li: a(href="docs.html")        Docs
-        li.active: a(href="blog.html") Blog
-        li: a(href="license.html")     License
-
-    main#content(role='main')
-      section.intro.profile
-        p
-          img(src='img/matt.png')
-          | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore.
-          span.social
-            a(href='#') Follow me on Twitter
-        nav(role='navigation')
-          ul
-            li
-              a.button(href='#') Blog
-            li
-              a.button(href='#tutorials') Tutorials
-      section.blogs
-        +Teaser(
-          "Introducing spaCy",
-          "blog_intro.html",
-          "February 2015",
-          "2015-02-18",
-          "Matthew Honnibal",
-          "<strong>spaCy</strong> is a new library for text processing in Python " +
-          "and Cython. I wrote it because I think small companies are terrible at " +
-          "natural language processing (NLP).  Or rather: small companies are using " +
-          "terrible NLP technology."
-        )
-
-        +Teaser(
-          "Parsing English with 500 lines of Python",
-          "blog_parser.html",
-          "December 18, 2013",
-          "2013-12-18",
-          "Matthew Hannibal",
-          "The Natural Language Processing (NLP) community has made big progress" +
-          "in syntactic parsing over the last few years. It’s now possible for a" +
-          "tiny Python implementation to perform better than the widely-used Stanford " +
-          "PCFG parser.")
-        +Teaser(
-          "A good Part-of-Speech tagger in about 200 lines of Python",
-          "blog_tagger.html",
-          "October 11, 2013",
-          "2013-09-11",
-          "Matthew Honnibal",
-          "There are a tonne of “best known techniques” for POS tagging, and you " +
-          "should ignore the others and just use greedy Averaged Perceptron."
-        )
-
-      section.intro
-        h2
-          a.permalink(href='#tutorials', name='tutorials') Tutorials
-
-      section.tutorials
-        include ./tutorials.jade
-
-    footer(role="contentinfo")
-      span.slogan.copyright &copy; 2015 Syllogism Co.
-
-    script(src='js/prism.js')
--- a/docs/redesign/blog_intro.jade
+++ b/docs/redesign/blog_intro.jade
@ -1,81 +0,0 @@
-extends ./template_post.jade
-
-
-  var urls = {
-    'pos_post': 'https://honnibal.wordpress.com/2013/09/11/a-good-part-of-speechpos-tagger-in-about-200-lines-of-python/',
-    'google_ngrams': "http://googleresearch.blogspot.com.au/2013/05/syntactic-ngrams-over-time.html",
-    'implementation': 'https://gist.github.com/syllog1sm/10343947',
-    'redshift': 'http://github.com/syllog1sm/redshift',
-    'tasker': 'https://play.google.com/store/apps/details?id=net.dinglisch.android.taskerm',
-    'acl_anthology': 'http://aclweb.org/anthology/',
-    'share_twitter': 'http://twitter.com/share?text=[ARTICLE HEADLINE]&url=[ARTICLE LINK]&via=honnibal'
-    }
-
- var my_research_software = '<a href="https://github.com/syllog1sm/redshift/tree/develop">my research software</a>'
-
- var how_to_write_a_POS_tagger = '<a href="https://honnibal.wordpress.com/2013/09/11/a-good-part-of-speechpos-tagger-in-about-200-lines-of-python/">how to write a part-of-speech tagger</a>'
-
- var parser_lnk = '<a href="https://honnibal.wordpress.com/2013/12/18/a-simple-fast-algorithm-for-natural-language-dependency-parsing/">parser</a>'
-
- var buy_a_commercial_license = '<a href="license.html">buy a commercial license</a>'
-
-
-block body_block
-  article.post
-    p.
-      <strong>spaCy</strong> is a new library for text processing in Python
-      and Cython. I wrote it because I think small companies are terrible at
-      natural language processing (NLP).  Or rather: small companies are using
-      terrible NLP technology.
-
-    p.
-      To do great NLP, you have to know a little about linguistics, a lot
-      about machine learning, and almost everything about the latest research.
-      The people who fit this description seldom join small companies.
-      Most are broke &ndash; they've just finished grad school.
-      If they don't want to stay in academia, they join Google, IBM, etc.
-
-    p.
-      The net result is that outside of the tech giants, commercial NLP has
-      changed little in the last ten years.  In academia, it's changed entirely.
-      Amazing improvements in quality.  Orders of magnitude faster.  But the
-      academic code is always GPL, undocumented, unuseable, or all three. 
-      You could implement the ideas yourself, but the papers are hard to read,
-      and training data is exorbitantly expensive.  So what are you left with?
-      A common answer is NLTK, which was written primarily as an educational resource.
-      Nothing past the tokenizer is suitable for production use.
-
-    p.
-      I used to think that the NLP community just needed to do more to communicate
-      its findings to software engineers.  So I wrote two blog posts, explaining
-      !{how_to_write_a_POS_tagger} and !{parser_lnk}.  Both were well
-      received, and there's been a bit of interest in !{my_research_software}
-      &ndash; even though it's entirely undocumented, and mostly unuseable to
-      anyone but me.
-    p.
-      So six months ago I quit my post-doc, and I've been working day and night
-      on spaCy since.  I'm now pleased to announce an alpha release.
-  
-    p.
-      If you're a small company doing NLP, I think spaCy will seem like a minor
-      miracle.  It's by far the fastest NLP software ever released.  The
-      full processing pipeline completes in 20ms per document, including accurate
-      tagging and parsing.  All strings are mapped to integer IDs, tokens are
-      linked to embedded word representations, and a range of useful features
-      are pre-calculated and cached.
-
-    p.
-      If none of that made any sense to you, here's the gist of it.  Computers
-      don't understand text.  This is unfortunate, because that's what the
-      web almost entirely consists of.  We want to recommend people text based
-      on other text they liked.  We want to shorten text to display it on a
-      mobile screen.  We want to aggregate it, link it, filter it, categorise
-      it, generate it and correct it.
-
-    p. 
-      spaCy provides a library of utility functions that help programmers
-      build such products.  It's commercial open source software: you can
-      either use it under the AGPL, or you can !{buy_a_commercial_license}
-      under generous terms.
-
-  footer(role='contentinfo')
--- a/docs/redesign/blog_parser.jade
+++ b/docs/redesign/blog_parser.jade
@ -1,938 +0,0 @@
-extends ./template_post.jade
-
-
-block body_block
-  - var urls = {}
-  //- urls.pos_post = 'https://honnibal.wordpress.com/2013/09/11/a-good-part-of-speechpos-tagger-in-about-200-lines-of-python/'
-  - urls.parser_post = "http://googleresearch.blogspot.com.au/2013/05/syntactic-ngrams-over-time.html"
-  - urls.implementation = 'https://gist.github.com/syllog1sm/10343947'
-  - urls.redshift = 'http://github.com/syllog1sm/redshift'
-  - urls.tasker = 'https://play.google.com/store/apps/details?id=net.dinglisch.android.taskerm'
-  - urls.acl_anthology = 'http://aclweb.org/anthology/'
-  - urls.share_twitter = "http://twitter.com/share?text=[ARTICLE HEADLINE]&url=[ARTICLE LINK]&via=honnibal"
-
-  // A comment
-
-  article.post
-    header
-      h2 Parsing English in 500 lines of Python
-      .subhead
-        | by 
-        a(href='#', rel='author') Matthew Honnibal
-        |  on 
-        time(datetime='2013-12-18') December 18, 2013
-    p
-      | A  
-      a(href=urls.google_ngrams) syntactic parser 
-      | describes a sentence’s grammatical structure, to help another
-      | application reason about it. Natural languages introduce many unexpected
-      | ambiguities, which our world-knowledge immediately filters out. A
-      | favourite example:
-
-    p.example They ate the pizza with anchovies
-
-    p
-      img(src='img/blog01.png', alt='Eat-with pizza-with ambiguity')
-    p
-      | A correct parse links “with” to “pizza”, while an incorrect parse
-      | links “with” to “eat”:
-
-    .displacy
-      iframe(src='displacy/anchovies_bad.html', height='275')
-
-    .displacy
-      iframe.displacy(src='displacy/anchovies_good.html', height='275')
-      a.view-displacy(href='#') View on displaCy
-      p.caption
-        | The Natural Language Processing (NLP) community has made big progress
-        | in syntactic parsing over the last few years.
-
-    p
-      | The Natural Language Processing (NLP) community has made big progress
-      | in syntactic parsing over the last few years. It’s now possible for
-      | a tiny Python implementation to perform better than the widely-used
-      | Stanford PCFG parser.
-
-    p
-      strong Update!
-      |  The Stanford CoreNLP library now includes a greedy transition-based
-      | dependency parser, similar to the one described in this post, but with
-      | an improved learning strategy. It is much faster and more accurate
-      | than this simple Python implementation.
-
-    table
-      thead
-        tr
-          th Parser
-          th Accuracy
-          th Speed (w/s)
-          th Language
-          th LOC
-      tbody
-        tr
-          td Stanford
-          td 89.6%
-          td 19
-          td Java
-          td
-            | > 4,000
-            sup
-              a(href='#note-1') [1]
-        tr
-          td
-            strong parser.py
-          td 89.8%
-          td 2,020
-          td Python
-            strong ~500
-        tr
-          td Redshift
-          td
-            strong 93.6%
-          td
-            strong 2,580
-          td Cython
-          td ~4,000
-    p
-      | The rest of the post sets up the problem, and then takes you through 
-      a(href=urls.implementation) a concise implementation
-      | , prepared for this post. The first 200 lines of parser.py, the
-      | part-of-speech tagger and learner, are described 
-      a(href=pos_tagger_url) here. You should probably at least skim that
-      | post before reading this one, unless you’re very familiar with NLP
-      | research.
-    p
-      | The Cython system, Redshift, was written for my current research. I
-      | plan to improve it for general use in June, after my contract ends
-      | at Macquarie University. The current version is 
-      a(href=urls.redshift) hosted on GitHub
-      | .
-    h3 Problem Description
-
-    p It’d be nice to type an instruction like this into your phone:
-
-    p.example
-      Set volume to zero when I’m in a meeting, unless John’s school calls.
-    p
-      | And have it set the appropriate policy. On Android you can do this
-      | sort of thing with 
-      a(href=urls.tasker) Tasker
-      | , but an NL interface would be much better. It’d be especially nice
-      | to receive a meaning representation you could edit, so you could see
-      | what it thinks you said, and correct it.
-    p
-      | There are lots of problems to solve to make that work, but some sort
-      | of syntactic representation is definitely necessary. We need to know that:
-
-    p.example
-      Unless John’s school calls, when I’m in a meeting, set volume to zero
-
-    p is another way of phrasing the first instruction, while:
-
-    p.example
-      Unless John’s school, call when I’m in a meeting
-
-    p means something completely different.
-
-    p
-      | A dependency parser returns a graph of word-word relationships,
-      | intended to make such reasoning easier. Our graphs will be trees &ndash;
-      | edges will be directed, and every node (word) will have exactly one
-      | incoming arc (one dependency, with its head), except one.
-
-    h4 Example usage
-
-    pre.language-python
-      code
-        | parser = parser.Parser()
-        | tokens = "Set the volume to zero when I 'm in a meeting unless John 's school calls".split()
-        | >>> tags, heads = parser.parse(tokens)
-        | >>> heads
-        | [-1, 2, 0, 0, 3, 0, 7, 5, 7, 10, 8, 0, 13, 15, 15, 11]
-        | >>> for i, h in enumerate(heads): 
-        | ...   head = tokens[heads[h]] if h &gt;= 1 else 'None'
-        | ...   print(tokens[i] + ' &lt;-- ' + head])
-        | Set <-- None
-        | the <-- volume
-        | volume <-- Set
-        | to <-- Set
-        | zero <-- to
-        | when <-- Set
-        | I <-- 'm
-        | 'm <-- when
-        | in <-- 'm
-        | a <-- meeting
-        | meeting <-- in
-        | unless <-- Set
-        | John <-- 's
-        | 's   <-- calls
-        | school <-- calls
-        | calls <-- unless
-
-    p.
-      The idea is that it should be slightly easier to reason from the parse,
-      than it was from the string. The parse-to-meaning mapping is hopefully
-      simpler than the string-to-meaning mapping.
-
-    p.
-      The most confusing thing about this problem area is that “correctness”
-      is defined by convention — by annotation guidelines. If you haven’t
-      read the guidelines and you’re not a linguist, you can’t tell whether
-      the parse is “wrong” or “right”, which makes the whole task feel weird
-      and artificial.
-    
-    p.
-      For instance, there’s a mistake in the parse above: “John’s school
-      calls” is structured wrongly, according to the Stanford annotation
-      guidelines. The structure of that part of the sentence is how the
-      annotators were instructed to parse an example like “John’s school
-      clothes”.
-    
-    p
-      | It’s worth dwelling on this point a bit. We could, in theory, have
-      | written our guidelines so that the “correct” parses were reversed.
-      | There’s good reason to believe the parsing task will be harder if we
-      | reversed our convention, as it’d be less consistent with the rest of
-      | the grammar. 
-      sup: a(href='#note-2') [2]
-      | But we could test that empirically, and we’d be pleased to gain an
-      | advantage by reversing the policy.
-
-    p
-      | We definitely do want that distinction in the guidelines — we don’t
-      | want both to receive the same structure, or our output will be less
-      | useful. The annotation guidelines strike a balance between what
-      | distinctions downstream applications will find useful, and what
-      | parsers will be able to predict easily.
-
-    h4 Projective trees
-
-    p
-      | There’s a particularly useful simplification that we can make, when
-      | deciding what we want the graph to look like: we can restrict the
-      | graph structures we’ll be dealing with. This doesn’t just give us a
-      | likely advantage in learnability; it can have deep algorithmic
-      | implications. We follow most work on English in constraining the
-      | dependency graphs to be 
-      em projective trees
-      | :
-
-    ol
-      li Tree. Every word has exactly one head, except for the dummy ROOT symbol.
-      li
-        | Projective. For every pair of dependencies (a1, a2) and (b1, b2),
-        | if a1 < b2, then a2 >= b2. In other words, dependencies cannot “cross”.
-        | You can’t have a pair of dependencies that goes a1 b1 a2 b2, or
-        | b1 a1 b2 a2.
-
-    p
-      | There’s a rich literature on parsing non-projective trees, and a
-      | smaller literature on parsing DAGs. But the parsing algorithm I’ll
-      | be explaining deals with projective trees.
-
-    h3 Greedy transition-based parsing
-
-    p
-      | Our parser takes as input a list of string tokens, and outputs a
-      | list of head indices, representing edges in the graph. If the 
-
-      em i
-
-      | th member of heads is 
-
-      em j
-
-      | , the dependency parse contains an edge (j, i). A transition-based
-      | parser is a finite-state transducer; it maps an array of N words
-      | onto an output array of N head indices:
-
-    table.center
-      tbody
-        tr
-          td
-            em start
-          td MSNBC
-          td reported
-          td that
-          td Facebook
-          td bought
-          td WhatsApp
-          td for
-          td $16bn
-          td
-            em root
-        tr
-          td 0
-          td 2
-          td 9
-          td 2
-          td 4
-          td 2
-          td 4
-          td 4
-          td 7
-          td 0
-    p
-      | The heads array denotes that the head of 
-      em MSNBC
-      |  is 
-      em reported
-      | : 
-      em MSNBC
-      |  is word 1, and 
-      em reported
-      |  is word 2, and 
-      code.language-python heads[1] == 2
-      | . You can already see why parsing a tree is handy — this data structure
-      | wouldn’t work if we had to output a DAG, where words may have multiple
-      | heads.
-
-    p
-      | Although 
-      code.language-python heads
-      | can be represented as an array, we’d actually like to maintain some
-      | alternate ways to access the parse, to make it easy and efficient to
-      | extract features. Our 
-
-      code.language-python Parse
-      | class looks like this:
-
-    pre.language-python
-      code
-        | class Parse(object):
-        |     def __init__(self, n):
-        |         self.n = n
-        |         self.heads = [None] * (n-1)
-        |         self.lefts = []
-        |         self.rights = []
-        |         for i in range(n+1):
-        |             self.lefts.append(DefaultList(0))
-        |             self.rights.append(DefaultList(0))
-        |     
-        |     def add_arc(self, head, child):
-        |         self.heads[child] = head
-        |         if child < head:
-        |             self.lefts[head].append(child)
-        |         else:
-        |             self.rights[head].append(child)
-
-    p
-      | As well as the parse, we also have to keep track of where we’re up
-      | to in the sentence. We’ll do this with an index into the 
-      code.language-python words
-      |  array, and a stack, to which we’ll push words, before popping them
-      | once their head is set. So our state data structure is fundamentally:
-
-    ul
-      li An index, i, into the list of tokens;
-      li The dependencies added so far, in Parse
-      li
-        | A stack, containing words that occurred before i, for which we’re
-        | yet to assign a head.
-
-    p Each step of the parsing process applies one of three actions to the state:
-
-    pre.language-python
-      code
-        | SHIFT = 0; RIGHT = 1; LEFT = 2
-        | MOVES = [SHIFT, RIGHT, LEFT]
-        | 
-        | def transition(move, i, stack, parse):
-        |     global SHIFT, RIGHT, LEFT
-        |     if move == SHIFT:
-        |         stack.append(i)
-        |         return i + 1
-        |     elif move == RIGHT:
-        |         parse.add_arc(stack[-2], stack.pop())
-        |         return i
-        |     elif move == LEFT:
-        |         parse.add_arc(i, stack.pop())
-        |         return i
-        |     raise GrammarError(&quot;Unknown move: %d&quot; % move)
-
-
-
-    p
-      | The 
-      code.language-python LEFT
-      |  and 
-      code.language-python RIGHT
-      |  actions add dependencies and pop the stack, while 
-      code.language-python SHIFT
-      |  pushes the stack and advances i into the buffer.
-    p.
-      So, the parser starts with an empty stack, and a buffer index at 0, with
-      no dependencies recorded. It chooses one of the (valid) actions, and
-      applies it to the state. It continues choosing actions and applying
-      them until the stack is empty and the buffer index is at the end of
-      the input. (It’s hard to understand this sort of algorithm without
-      stepping through it. Try coming up with a sentence, drawing a projective
-      parse tree over it, and then try to reach the parse tree by choosing
-      the right sequence of transitions.)
-
-    p Here’s what the parsing loop looks like in code:
-
-    pre.language-python
-      code
-        | class Parser(object):
-        |     ...
-        |     def parse(self, words):
-        |         tags = self.tagger(words)
-        |         n = len(words)
-        |         idx = 1
-        |         stack = [0]
-        |         deps = Parse(n)
-        |         while stack or idx < n:
-        |             features = extract_features(words, tags, idx, n, stack, deps)
-        |             scores = self.model.score(features)
-        |             valid_moves = get_valid_moves(i, n, len(stack))
-        |             next_move = max(valid_moves, key=lambda move: scores[move])
-        |             idx = transition(next_move, idx, stack, parse)
-        |         return tags, parse
-        | 
-        | def get_valid_moves(i, n, stack_depth):
-        |     moves = []
-        |     if i < n:
-        |         moves.append(SHIFT)
-        |     if stack_depth <= 2:
-        |         moves.append(RIGHT)
-        |     if stack_depth <= 1:
-        |         moves.append(LEFT)
-        |     return moves
-      
-    p.
-      We start by tagging the sentence, and initializing the state. We then
-      map the state to a set of features, which we score using a linear model.
-      We then find the best-scoring valid move, and apply it to the state.
-
-    p
-      | The model scoring works the same as it did in 
-      a(href=urls.post) the POS tagger.
-      | If you’re confused about the idea of extracting features and scoring
-      | them with a linear model, you should review that post. Here’s a reminder
-      | of how the model scoring works:
-
-    pre.language-python
-      code
-        | class Perceptron(object)
-        |     ...
-        |     def score(self, features):
-        |         all_weights = self.weights
-        |         scores = dict((clas, 0) for clas in self.classes)
-        |         for feat, value in features.items():
-        |             if value == 0:
-        |                 continue
-        |             if feat not in all_weights:
-        |                 continue
-        |             weights = all_weights[feat]
-        |             for clas, weight in weights.items():
-        |                 scores[clas] += value * weight
-        |         return scores
-
-    p.
-      It’s just summing the class-weights for each feature. This is often
-      expressed as a dot-product, but when you’re dealing with multiple
-      classes, that gets awkward, I find.
-    
-    p.
-      The beam parser (RedShift) tracks multiple candidates, and only decides
-      on the best one at the very end. We’re going to trade away accuracy
-      in favour of efficiency and simplicity. We’ll only follow a single
-      analysis. Our search strategy will be entirely greedy, as it was with
-      the POS tagger. We’ll lock-in our choices at every step.
-
-    p.
-      If you read the POS tagger post carefully, you might see the underlying
-      similarity. What we’ve done is mapped the parsing problem onto a
-      sequence-labelling problem, which we address using a “flat”, or unstructured,
-      learning algorithm (by doing greedy search).
-
-    h3 Features
-    p.
-      Feature extraction code is always pretty ugly. The features for the parser
-      refer to a few tokens from the context:
-
-    ul
-      li The first three words of the buffer (n0, n1, n2)
-      li The top three words of the stack (s0, s1, s2)
-      li The two leftmost children of s0 (s0b1, s0b2);
-      li The two rightmost children of s0 (s0f1, s0f2);
-      li The two leftmost children of n0 (n0b1, n0b2)
-
-    p.
-      For these 12 tokens, we refer to the word-form, the part-of-speech tag,
-      and the number of left and right children attached to the token.
-
-    p.
-      Because we’re using a linear model, we have our features refer to pairs
-      and triples of these atomic properties.
-
-    pre.language-python
-      code
-        | def extract_features(words, tags, n0, n, stack, parse):
-        |     def get_stack_context(depth, stack, data):
-        |         if depth &gt;= 3:
-        |             return data[stack[-1]], data[stack[-2]], data[stack[-3]]
-        |         elif depth &gt;= 2:
-        |             return data[stack[-1]], data[stack[-2]], ''
-        |         elif depth == 1:
-        |             return data[stack[-1]], '', ''
-        |         else:
-        |             return '', '', ''
-        | 
-        |     def get_buffer_context(i, n, data):
-        |         if i + 1 &gt;= n:
-        |             return data[i], '', ''
-        |         elif i + 2 &gt;= n:
-        |             return data[i], data[i + 1], ''
-        |         else:
-        |             return data[i], data[i + 1], data[i + 2]
-        | 
-        |     def get_parse_context(word, deps, data):
-        |         if word == -1:
-        |             return 0, '', ''
-        |         deps = deps[word]
-        |         valency = len(deps)
-        |         if not valency:
-        |             return 0, '', ''
-        |         elif valency == 1:
-        |             return 1, data[deps[-1]], ''
-        |         else:
-        |             return valency, data[deps[-1]], data[deps[-2]]
-        | 
-        |     features = {}
-        |     # Set up the context pieces --- the word, W, and tag, T, of:
-        |     # S0-2: Top three words on the stack
-        |     # N0-2: First three words of the buffer
-        |     # n0b1, n0b2: Two leftmost children of the first word of the buffer
-        |     # s0b1, s0b2: Two leftmost children of the top word of the stack
-        |     # s0f1, s0f2: Two rightmost children of the top word of the stack
-        | 
-        |     depth = len(stack)
-        |     s0 = stack[-1] if depth else -1
-        | 
-        |     Ws0, Ws1, Ws2 = get_stack_context(depth, stack, words)
-        |     Ts0, Ts1, Ts2 = get_stack_context(depth, stack, tags)
-        | 
-        |     Wn0, Wn1, Wn2 = get_buffer_context(n0, n, words)
-        |     Tn0, Tn1, Tn2 = get_buffer_context(n0, n, tags)
-        | 
-        |     Vn0b, Wn0b1, Wn0b2 = get_parse_context(n0, parse.lefts, words)
-        |     Vn0b, Tn0b1, Tn0b2 = get_parse_context(n0, parse.lefts, tags)
-        | 
-        |     Vn0f, Wn0f1, Wn0f2 = get_parse_context(n0, parse.rights, words)
-        |     _, Tn0f1, Tn0f2 = get_parse_context(n0, parse.rights, tags)
-        | 
-        |     Vs0b, Ws0b1, Ws0b2 = get_parse_context(s0, parse.lefts, words)
-        |     _, Ts0b1, Ts0b2 = get_parse_context(s0, parse.lefts, tags)
-        | 
-        |     Vs0f, Ws0f1, Ws0f2 = get_parse_context(s0, parse.rights, words)
-        |     _, Ts0f1, Ts0f2 = get_parse_context(s0, parse.rights, tags)
-        | 
-        |     # Cap numeric features at 5? 
-        |     # String-distance
-        |     Ds0n0 = min((n0 - s0, 5)) if s0 != 0 else 0
-        | 
-        |     features['bias'] = 1
-        |     # Add word and tag unigrams
-        |     for w in (Wn0, Wn1, Wn2, Ws0, Ws1, Ws2, Wn0b1, Wn0b2, Ws0b1, Ws0b2, Ws0f1, Ws0f2):
-        |         if w:
-        |             features['w=%s' % w] = 1
-        |     for t in (Tn0, Tn1, Tn2, Ts0, Ts1, Ts2, Tn0b1, Tn0b2, Ts0b1, Ts0b2, Ts0f1, Ts0f2):
-        |         if t:
-        |             features['t=%s' % t] = 1
-        | 
-        |     # Add word/tag pairs
-        |     for i, (w, t) in enumerate(((Wn0, Tn0), (Wn1, Tn1), (Wn2, Tn2), (Ws0, Ts0))):
-        |         if w or t:
-        |             features['%d w=%s, t=%s' % (i, w, t)] = 1
-        | 
-        |     # Add some bigrams
-        |     features['s0w=%s,  n0w=%s' % (Ws0, Wn0)] = 1
-        |     features['wn0tn0-ws0 %s/%s %s' % (Wn0, Tn0, Ws0)] = 1
-        |     features['wn0tn0-ts0 %s/%s %s' % (Wn0, Tn0, Ts0)] = 1
-        |     features['ws0ts0-wn0 %s/%s %s' % (Ws0, Ts0, Wn0)] = 1
-        |     features['ws0-ts0 tn0 %s/%s %s' % (Ws0, Ts0, Tn0)] = 1
-        |     features['wt-wt %s/%s %s/%s' % (Ws0, Ts0, Wn0, Tn0)] = 1
-        |     features['tt s0=%s n0=%s' % (Ts0, Tn0)] = 1
-        |     features['tt n0=%s n1=%s' % (Tn0, Tn1)] = 1
-        | 
-        |     # Add some tag trigrams
-        |     trigrams = ((Tn0, Tn1, Tn2), (Ts0, Tn0, Tn1), (Ts0, Ts1, Tn0), 
-        |                 (Ts0, Ts0f1, Tn0), (Ts0, Ts0f1, Tn0), (Ts0, Tn0, Tn0b1),
-        |                 (Ts0, Ts0b1, Ts0b2), (Ts0, Ts0f1, Ts0f2), (Tn0, Tn0b1, Tn0b2),
-        |                 (Ts0, Ts1, Ts1))
-        |     for i, (t1, t2, t3) in enumerate(trigrams):
-        |         if t1 or t2 or t3:
-        |             features['ttt-%d %s %s %s' % (i, t1, t2, t3)] = 1
-        | 
-        |     # Add some valency and distance features
-        |     vw = ((Ws0, Vs0f), (Ws0, Vs0b), (Wn0, Vn0b))
-        |     vt = ((Ts0, Vs0f), (Ts0, Vs0b), (Tn0, Vn0b))
-        |     d = ((Ws0, Ds0n0), (Wn0, Ds0n0), (Ts0, Ds0n0), (Tn0, Ds0n0),
-        |         ('t' + Tn0+Ts0, Ds0n0), ('w' + Wn0+Ws0, Ds0n0))
-        |     for i, (w_t, v_d) in enumerate(vw + vt + d):
-        |         if w_t or v_d:
-        |             features['val/d-%d %s %d' % (i, w_t, v_d)] = 1
-        |     return features</code></pre>
-
-
-    h3 Training
-    
-    p.
-      Weights are learned using the same algorithm, averaged perceptron, that
-      we used for part-of-speech tagging. Its key strength is that it’s an
-      online learning algorithm: examples stream in one-by-one, we make our
-      prediction, check the actual answer, and adjust our beliefs (weights)
-      if we were wrong.
-        
-    p The training loop looks like this:
-
-    pre.language-python
-      code
-          | class Parser(object):
-          |     ...
-          |     def train_one(self, itn, words, gold_tags, gold_heads):
-          |         n = len(words)
-          |         i = 2; stack = [1]; parse = Parse(n)
-          |         tags = self.tagger.tag(words)
-          |         while stack or (i + 1) < n:
-          |             features = extract_features(words, tags, i, n, stack, parse)
-          |             scores = self.model.score(features)
-          |             valid_moves = get_valid_moves(i, n, len(stack))
-          |             guess = max(valid_moves, key=lambda move: scores[move])
-          |             gold_moves = get_gold_moves(i, n, stack, parse.heads, gold_heads)
-          |             best = max(gold_moves, key=lambda move: scores[move])
-          |         self.model.update(best, guess, features)
-          |         i = transition(guess, i, stack, parse)
-          |     # Return number correct
-          |     return len([i for i in range(n-1) if parse.heads[i] == gold_heads[i]])
-
-
-      
-    p 
-      | The most interesting part of the training process is in 
-      code.language-python get_gold_moves.
-      | The performance of our parser is made possible by an advance by Goldberg
-      | and Nivre (2012), who showed that we’d been doing this wrong for years.
-    
-    p
-      | In the POS-tagging post, I cautioned that during training you need to
-      | make sure you pass in the last two
-      em predicted
-      | tags as features for the current tag, not the last two 
-      em gold
-      | tags. At test time you’ll only have the predicted tags, so if you
-      | base your features on the gold sequence during training, your training
-      | contexts won’t resemble your test-time contexts, so you’ll learn the
-      | wrong weights.
-
-    p.
-      In parsing, the problem was that we didn’t know 
-      em how
-      | to pass in the predicted sequence! Training worked by taking the
-      | gold-standard tree, and finding a transition sequence that led to it.
-      | i.e., you got back a sequence of moves, with the guarantee that if
-      | you followed those moves, you’d get the gold-standard dependencies.
-    
-    p
-      | The problem is, we didn’t know how to define the “correct” move to
-      | teach a parser to make if it was in any state that 
-      em wasn’t
-      |  along that gold-standard sequence. Once the parser had made a mistake,
-      | we didn’t know how to train from that example.
-
-    p
-      | That was a big problem, because it meant that once the parser started
-      | making mistakes, it would end up in states unlike any in its training
-      | data &ndash; leading to yet more mistakes. The problem was specific
-      | to greedy parsers: once you use a beam, there’s a natural way to do
-      | structured prediction.
-    p
-      | The solution seems obvious once you know it, like all the best breakthroughs.
-      | What we do is define a function that asks “How many gold-standard
-      | dependencies can be recovered from this state?”. If you can define
-      | that function, then you can apply each move in turn, and ask, “How
-      | many gold-standard dependencies can be recovered from 
-      em this
-      | state?”. If the action you applied allows 
-      em fewer
-      | gold-standard dependencies to be reached, then it is sub-optimal.
-
-    p That’s a lot to take in.
-
-    p
-      | So we have this function 
-      code Oracle(state)
-      | :
-      pre
-        code
-          | Oracle(state) = | gold_arcs ∩ reachable_arcs(state) |
-    p
-      | We also have a set of actions, each of which returns a new state.
-      | We want to know:
-
-    ul
-      li shift_cost = Oracle(state) – Oracle(shift(state))
-      li right_cost = Oracle(state) – Oracle(right(state))
-      li left_cost = Oracle(state) – Oracle(left(state))
-    
-    p
-      | Now, at least one of those costs 
-      em has
-      | to be zero. Oracle(state) is asking, “what’s the cost of the best
-      | path forward?”, and the first action of that best path has to be
-      | shift, right, or left.
-
-    p
-      | It turns out that we can derive Oracle fairly simply for many transition
-      | systems. The derivation for the transition system we’re using, Arc
-      | Hybrid, is in Goldberg and Nivre (2013).
-
-    p
-      | We’re going to implement the oracle as a function that returns the
-      | zero-cost moves, rather than implementing a function Oracle(state).
-      | This prevents us from doing a bunch of costly copy operations.
-      | Hopefully the reasoning in the code isn’t too hard to follow, but
-      | you can also consult Goldberg and Nivre’s papers if you’re confused
-      | and want to get to the bottom of this.
-
-    pre.language-python
-      code
-        | def get_gold_moves(n0, n, stack, heads, gold):
-        |     def deps_between(target, others, gold):
-        |         for word in others:
-        |             if gold[word] == target or gold[target] == word:
-        |                 return True
-        |         return False
-        | 
-        |     valid = get_valid_moves(n0, n, len(stack))
-        |     if not stack or (SHIFT in valid and gold[n0] == stack[-1]):
-        |         return [SHIFT]
-        |     if gold[stack[-1]] == n0:
-        |         return [LEFT]
-        |     costly = set([m for m in MOVES if m not in valid])
-        |     # If the word behind s0 is its gold head, Left is incorrect
-        |     if len(stack) >= 2 and gold[stack[-1]] == stack[-2]:
-        |         costly.add(LEFT)
-        |     # If there are any dependencies between n0 and the stack,
-        |     # pushing n0 will lose them.
-        |     if SHIFT not in costly and deps_between(n0, stack, gold):
-        |         costly.add(SHIFT)
-        |     # If there are any dependencies between s0 and the buffer, popping
-        |     # s0 will lose them.
-        |     if deps_between(stack[-1], range(n0+1, n-1), gold):
-        |         costly.add(LEFT)
-        |         costly.add(RIGHT)
-        |     return [m for m in MOVES if m not in costly]</code></pre>
-
-
-
-    p
-      | Doing this “dynamic oracle” training procedure makes a big difference
-      | to accuracy — typically 1-2%, with no difference to the way the run-time
-      | works. The old “static oracle” greedy training procedure is fully
-      | obsolete; there’s no reason to do it that way any more.
-
-    h3 Conclusion
-
-    p
-      | I have the sense that language technologies, particularly those relating
-      | to grammar, are particularly mysterious. I can imagine having no idea
-      | what the program might even do.
-
-    p
-      | I think it therefore seems natural to people that the best solutions
-      | would be over-whelmingly complicated. A 200,000 line Java package
-      | feels appropriate.
-    p
-      | But, algorithmic code is usually short, when only a single algorithm
-      | is implemented. And when you only implement one algorithm, and you
-      | know exactly what you want to write before you write a line, you
-      | also don’t pay for any unnecessary abstractions, which can have a
-      | big performance impact.
-
-    h3 Notes
-    p
-      a(name='note-1')
-        | [1] I wasn’t really sure how to count the lines of code in the Stanford
-        | parser. Its jar file ships over 200k, but there are a lot of different
-        | models in it. It’s not important, but it's certainly over 4k.
-
-    p
-      a(name='note-2')
-      | [2] For instance, how would you parse, “John’s school of music calls”?
-      | You want to make sure the phrase “John’s school” has a consistent
-      | structure in both “John’s school calls” and “John’s school of music
-      | calls”. Reasoning about the different “slots” you can put a phrase
-      | into is a key way we reason about what syntactic analyses look like.
-      | You can think of each phrase as having a different shaped connector,
-      | which you need to plug into different slots — which each phrase also
-      | has a certain number of, each of a different shape. We’re trying to
-      | figure out what connectors are where, so we can figure out how the
-      | sentences are put together.
-
-    h3 Idle speculation
-    p
-      | For a long time, incremental language processing algorithms were
-      | primarily of scientific interest. If you want to write a parser to
-      | test a theory about how the human sentence processor might work, well,
-      | that parser needs to build partial interpretations. There’s a wealth
-      | of evidence, including commonsense introspection, that establishes
-      | that we don’t buffer input and analyse it once the speaker has finished.
-
-    p
-      | But now algorithms with that neat scientific feature are winning!
-      | As best as I can tell, the secret to that success is to be:
-
-    ul
-      li Incremental. Earlier words constrain the search.
-      li
-        | Error-driven. Training involves a working hypothesis, which is
-        | updated as it makes mistakes.
-
-    p
-      | The links to human sentence processing seem tantalising. I look
-      | forward to seeing whether these engineering breakthroughs lead to
-      | any psycholinguistic advances.
-
-    h3 Bibliography
-
-    p
-      | The NLP literature is almost entirely open access. All of the relavant
-      | papers can be found 
-      a(href=urls.acl_anthology, rel='nofollow') here
-      | .
-    p
-      | The parser I’ve described is an implementation of the dynamic-oracle
-      | Arc-Hybrid system here:
-
-      span.bib-item
-        | Goldberg, Yoav; Nivre, Joakim. 
-        em Training Deterministic Parsers with Non-Deterministic Oracles
-        | . TACL 2013
-    p
-      | However, I wrote my own features for it. The arc-hybrid system was
-      | originally described here:
-
-      span.bib-item
-        | Kuhlmann, Marco; Gomez-Rodriguez, Carlos; Satta, Giorgio. Dynamic
-        | programming algorithms for transition-based dependency parsers. ACL 2011
-
-    p
-      | The dynamic oracle training method was first described here:
-      span.bib-item
-        | A Dynamic Oracle for Arc-Eager Dependency Parsing. Goldberg, Yoav;
-        | Nivre, Joakim. COLING 2012
-
-    p
-      | This work depended on a big break-through in accuracy for transition-based
-      | parsers, when beam-search was properly explored by Zhang and Clark.
-      | They have several papers, but the preferred citation is:
-
-      span.bib-item
-        | Zhang, Yue; Clark, Steven. Syntactic Processing Using the Generalized
-        | Perceptron and Beam Search. Computational Linguistics 2011 (1)
-    p
-      | Another important paper was this little feature engineering paper,
-      | which further improved the accuracy:
-
-      span.bib-item
-        | Zhang, Yue;  Nivre, Joakim. Transition-based Dependency Parsing with
-        | Rich Non-local Features. ACL 2011
-
-    p
-      | The generalised perceptron, which is the learning framework for these
-      | beam parsers, is from this paper:
-      span.bib-item
-        | Collins, Michael. Discriminative Training Methods for Hidden Markov
-        | Models: Theory and Experiments with Perceptron Algorithms. EMNLP 2002
-
-    h3 Experimental details
-    p
-      | The results at the start of the post refer to Section 22 of the Wall
-      | Street Journal corpus. The Stanford parser was run as follows:
-
-    pre.language-bash
-      code
-        | java -mx10000m -cp "$scriptdir/*:" edu.stanford.nlp.parser.lexparser.LexicalizedParser \
-        | -outputFormat "penn" edu/stanford/nlp/models/lexparser/englishFactored.ser.gz $*
-
-
-
-    p
-      | A small post-process was applied, to undo the fancy tokenisation
-      | Stanford adds for numbers, to make them match the PTB tokenisation:
-
-    pre.language-python
-      code
-        | """Stanford parser retokenises numbers. Split them."""
-        | import sys
-        | import re
-        |  
-        | qp_re = re.compile('\xc2\xa0')
-        | for line in sys.stdin:
-        |     line = line.rstrip()
-        |     if qp_re.search(line):
-        |         line = line.replace('(CD', '(QP (CD', 1) + ')'
-        |         line = line.replace('\xc2\xa0', ') (CD ')
-        |     print line
-
-    p
-      | The resulting PTB-format files were then converted into dependencies
-      | using the Stanford converter:
-
-    pre.language-bash
-      code
-        | ./scripts/train.py -x zhang+stack -k 8 -p ~/data/stanford/train.conll ~/data/parsers/tmp
-        | ./scripts/parse.py ~/data/parsers/tmp ~/data/stanford/devi.txt /tmp/parse/
-        | ./scripts/evaluate.py /tmp/parse/parses ~/data/stanford/dev.conll
-    p
-      | I can’t easily read that anymore, but it should just convert every
-      | .mrg file in a folder to a CoNLL-format Stanford basic dependencies
-      | file, using the settings common in the dependency literature.
-
-    p
-      | I then converted the gold-standard trees from WSJ 22, for the evaluation.
-      | Accuracy scores refer to unlabelled attachment score (i.e. the head index)
-      | of all non-punctuation tokens.
-
-    p
-      | To train parser.py, I fed the gold-standard PTB trees for WSJ 02-21
-      | into the same conversion script.
-
-    p
-      | In a nutshell: The Stanford model and parser.py are trained on the
-      | same set of sentences, and they each make their predictions on a
-      | held-out test set, for which we know the answers. Accuracy refers
-      | to how many of the words’ heads we got correct.
-
-    p
-      | Speeds were measured on a 2.4Ghz Xeon. I ran the experiments on a
-      | server, to give the Stanford parser more memory. The parser.py system
-      | runs fine on my MacBook Air. I used PyPy for the parser.py experiments;
-      | CPython was about half as fast on an early benchmark.
-
-    p
-      | One of the reasons parser.py is so fast is that it does unlabelled
-      | parsing. Based on previous experiments, a labelled parser would likely
-      | be about 40x slower, and about 1% more accurate. Adapting the program
-      | to labelled parsing would be a good exercise for the reader, if you
-      | have access to the data.
-
-    p
-      | The result from the Redshift parser was produced from commit 
-      code.language-python b6b624c9900f3bf
-      | , which was run as follows:
-    pre.language-bash
-      code
-        | ./scripts/train.py -x zhang+stack -k 8 -p ~/data/stanford/train.conll ~/data/parsers/tmp
-        | ./scripts/parse.py ~/data/parsers/tmp ~/data/stanford/devi.txt /tmp/parse/
-        | ./scripts/evaluate.py /tmp/parse/parses ~/data/stanford/dev.conll<
-
-    footer.meta(role='contentinfo')
-      a.button.button-twitter(href=urls.share_twitter, title='Share on Twitter', rel='nofollow') Share on Twitter
-      .discuss
-        a.button.button-hn(href='#', title='Discuss on Hacker News', rel='nofollow') Discuss on Hacker News
-        | 
-        a.button.button-reddit(href='#', title='Discuss on Reddit', rel='nofollow') Discuss on Reddit
--- a/docs/redesign/blog_tagger.jade
+++ b/docs/redesign/blog_tagger.jade
@ -1,492 +0,0 @@
-extends ./template_post.jade
-
-block body_block
-  - var urls = {}
-  - urls.share_twitter = "http://twitter.com/share?text=[ARTICLE HEADLINE]&url=[ARTICLE LINK]&via=honnibal"
-
-
-  article.post
-    header
-      h2 A good Part-of-Speech tagger in about 200 lines of Python
-      .subhead
-        | by 
-        a(href="#" rel="author") Matthew Honnibal
-        | on 
-        time(datetime='2013-09-11') October 11, 2013
-
-    p.
-      Up-to-date knowledge about natural language processing is mostly locked away
-      in academia. And academics are mostly pretty self-conscious when we write.
-      We’re careful. We don’t want to stick our necks out too much. But under-confident
-      recommendations suck, so here’s how to write a good part-of-speech tagger.
-      
-    p.
-      There are a tonne of “best known techniques” for POS tagging, and you should
-      ignore the others and just use Averaged Perceptron.
-      
-    p.
-      You should use two tags of history, and features derived from the Brown word
-      clusters distributed here.
-      
-    p.
-      If you only need the tagger to work on carefully edited text, you should
-      use case-sensitive features, but if you want a more robust tagger you
-      should avoid them because they’ll make you over-fit to the conventions
-      of your training domain. Instead, features that ask “how frequently is
-      this word title-cased, in a large sample from the web?” work well. Then
-      you can lower-case your comparatively tiny training corpus.
-      
-    p.
-      For efficiency, you should figure out which frequent words in your training
-      data have unambiguous tags, so you don’t have to do anything but output
-      their tags when they come up. About 50% of the words can be tagged that way.
-      
-    p.
-      And unless you really, really can’t do without an extra 0.1% of accuracy,
-      you probably shouldn’t bother with any kind of search strategy  you should
-      just use a greedy model.
-      
-    p.
-      If you do all that, you’ll find your tagger easy to write and understand,
-      and an efficient Cython implementation will perform as follows on the standard
-      evaluation, 130,000 words of text from the Wall Street Journal:
-      
-    table
-      thead
-        tr
-          th Tagger
-          th Accuracy
-          th Time (130k words)
-      tbody
-        tr
-          td CyGreedyAP
-          td 97.1%
-          td 4s
-
-    p.
-      The 4s includes initialisation time — the actual per-token speed is high
-      enough to be irrelevant; it won’t be your bottleneck.
-      
-    p.
-      It’s tempting to look at 97% accuracy and say something similar, but that’s
-      not true. My parser is about 1% more accurate if the input has hand-labelled
-      POS tags, and the taggers all perform much worse on out-of-domain data.
-      Unfortunately accuracies have been fairly flat for the last ten years.
-      That’s why my recommendation is to just use a simple and fast tagger that’s
-      roughly as good.
-      
-    p.
-      The thing is though, it’s very common to see people using taggers that
-      aren’t anywhere near that good!  For an example of what a non-expert is
-      likely to use, these were the two taggers wrapped by TextBlob, a new Python
-      api that I think is quite neat:
-      
-    table
-      thead
-        tr
-          th Tagger
-          th Accuracy
-          th Time (130k words)
-      tbody
-        tr
-          td NLTK
-          td 94.0%
-          td 3m56s
-        tr
-          td Pattern
-          td 93.5%
-          td 26s
-
-    p.
-      Both Pattern and NLTK are very robust and beautifully well documented, so
-      the appeal of using them is obvious. But Pattern’s algorithms are pretty
-      crappy, and NLTK carries tremendous baggage around in its implementation
-      because of its massive framework, and double-duty as a teaching tool.
-
-    p.  
-      As a stand-alone tagger, my Cython implementation is needlessly complicated
-      &ndash; it was written for my parser. So today I wrote a 200 line version
-      of my recommended algorithm for TextBlob. It gets:
-      
-    table
-      thead
-        tr
-          th Tagger
-          th Accuracy
-          th Time (130k words)
-      tbody
-        tr
-          td PyGreedyAP
-          td 96.8%
-          td 12s
-
-    p.
-      I traded some accuracy and a lot of efficiency to keep the implementation
-      simple. Here’s a far-too-brief description of how it works.
-      
-    h3 Averaged perceptron
-
-    p.
-      POS tagging is a “supervised learning problem”. You’re given a table of data,
-      and you’re told that the values in the last column will be missing during
-      run-time. You have to find correlations from the other columns to predict
-      that value.
-      
-    p.
-      So for us, the missing column will be “part of speech at word i“. The predictor
-      columns (features) will be things like “part of speech at word i-1“, “last three
-      letters of word at i+1“, etc
-      
-    p.
-      First, here’s what prediction looks like at run-time:
-
-    pre.language-python
-      code
-        | def predict(self, features):
-        |     '''Dot-product the features and current weights and return the best class.'''
-        |     scores = defaultdict(float)
-        |     for feat in features:
-        |         if feat not in self.weights:
-        |             continue
-        |         weights = self.weights[feat]
-        |         for clas, weight in weights.items():
-        |             scores[clas] += weight
-        |     # Do a secondary alphabetic sort, for stability
-        |     return max(self.classes, key=lambda clas: (scores[clas], clas))
-
-    p.
-      Earlier I described the learning problem as a table, with one of the columns
-      marked as missing-at-runtime. For NLP, our tables are always exceedingly
-      sparse. You have columns like “word i-1=Parliament”, which is almost always
-      0. So our “weight vectors” can pretty much never be implemented as vectors.
-      Map-types are good though — here we use dictionaries.
-      
-    p.
-      The input data, features, is a set with a member for every non-zero “column”
-      in our “table” &ndash; every active feature. Usually this is actually a dictionary,
-      to let you set values for the features. But here all my features are binary
-      present-or-absent type deals.
-      
-    p.
-      The weights data-structure is a dictionary of dictionaries, that ultimately
-      associates feature/class pairs with some weight. You want to structure it
-      this way instead of the reverse because of the way word frequencies are
-      distributed: most words are rare, frequent words are very frequent.
-      
-    h3 Learning the weights
-
-    p.
-      Okay, so how do we get the values for the weights? We start with an empty
-      weights dictionary, and iteratively do the following:
-
-    ol
-      li Receive a new (features, POS-tag) pair
-      li Guess the value of the POS tag given the current “weights” for the features
-      li If guess is wrong, add +1 to the weights associated with the correct class for these features, and -1 to the weights for the predicted class.
-
-
-    p.
-      It’s one of the simplest learning algorithms. Whenever you make a mistake,
-      increment the weights for the correct class, and penalise the weights that
-      led to your false prediction. In code:
-    
-    pre.language-python
-      code
-        | def train(self, nr_iter, examples):
-        |     for i in range(nr_iter):
-        |         for features, true_tag in examples:
-        |             guess = self.predict(features)
-        |             if guess != true_tag:
-        |                 for f in features:
-        |                     self.weights[f][true_tag] += 1
-        |                     self.weights[f][guess] -= 1
-        |         random.shuffle(examples)
-    p.
-      If you iterate over the same example this way, the weights for the correct
-      class would have to come out ahead, and you’d get the example right. If
-      you think about what happens with two examples, you should be able to
-      see that it will get them both right unless the features are identical.
-      In general the algorithm will converge so long as the examples are
-      linearly separable, although that doesn’t matter for our purpose.
-      
-    h3 Averaging the weights
-
-    p.
-      We need to do one more thing to make the perceptron algorithm competitive.
-      The problem with the algorithm so far is that if you train it twice on
-      slightly different sets of examples, you end up with really different models.
-      It doesn’t generalise that smartly. And the problem is really in the later
-      iterations — if you let it run to convergence, it’ll pay lots of attention
-      to the few examples it’s getting wrong, and mutate its whole model around
-      them.
-
-    p.
-      So, what we’re going to do is make the weights more "sticky" &ndash; give
-      the model less chance to ruin all its hard work in the later rounds. And
-      we’re going to do that by returning the averaged weights, not the final
-      weights.
-
-    p.
-      I doubt there are many people who are convinced that’s the most obvious
-      solution to the problem, but whatever. We’re not here to innovate, and this
-      way is time tested on lots of problems. If you have another idea, run the
-      experiments and tell us what you find. Actually I’d love to see more work
-      on this, now that the averaged perceptron has become such a prominent learning
-      algorithm in NLP.
-      
-    p.
-      Okay. So this averaging. How’s that going to work? Note that we don’t want
-      to just average after each outer-loop iteration. We want the average of all
-      the values — from the inner loop. So if we have 5,000 examples, and we train
-      for 10 iterations, we’ll average across 50,000 values for each weight.
-      
-    p.
-      Obviously we’re not going to store all those intermediate values. Instead,
-      we’ll track an accumulator for each weight, and divide it by the number of
-      iterations at the end. Again: we want the average weight assigned to a
-      feature/class pair during learning, so the key component we need is the total
-      weight it was assigned. But we also want to be careful about how we compute
-      that accumulator, too. On almost any instance, we’re going to see a tiny
-      fraction of active feature/class pairs. All the other feature/class weights
-      won’t change. So we shouldn’t have to go back and add the unchanged value
-      to our accumulators anyway, like chumps.
-      
-    p.
-      Since we’re not chumps, we’ll make the obvious improvement. We’ll maintain
-      another dictionary that tracks how long each weight has gone unchanged. Now
-      when we do change a weight, we can do a fast-forwarded update to the accumulator,
-      for all those iterations where it lay unchanged.
-      
-    p.
-      Here’s what a weight update looks like now that we have to maintain the
-      totals and the time-stamps:
-      
-    pre.language-python
-      code
-        | def update(self, truth, guess, features):
-        |     def upd_feat(c, f, v):
-        |         nr_iters_at_this_weight = self.i - self._timestamps[f][c]
-        |         self._totals[f][c] += nr_iters_at_this_weight * self.weights[f][c]
-        |         self.weights[f][c] += v
-        |         self._timestamps[f][c] = self.i
-       
-        |     self.i += 1
-        |     for f in features:
-        |         upd_feat(truth, f, 1.0)
-        |         upd_feat(guess, f, -1.0)
-
-    h3 Features and pre-processing
-    
-    p.
-      The POS tagging literature has tonnes of intricate features sensitive to
-      case, punctuation, etc. They help on the standard test-set, which is from
-      Wall Street Journal articles from the 1980s, but I don’t see how they’ll
-      help us learn models that are useful on other text.
-      
-    p.
-      To help us learn a more general model, we’ll pre-process the data prior
-      to feature extraction, as follows:
-      
-    ul
-      li All words are lower cased;
-      li Digits in the range 1800-2100 are represented as !YEAR;
-      li Other digit strings are represented as !DIGITS
-      li
-        | It would be better to have a module recognising dates, phone numbers,
-        | emails, hash-tags, etc. but that will have to be pushed back into the
-        | tokenization.
-      
-    p.
-      I played around with the features a little, and this seems to be a reasonable
-      bang-for-buck configuration in terms of getting the development-data accuracy
-      to 97% (where it typically converges anyway), and having a smaller memory
-      foot-print:
-
-    pre.language-python
-      code
-        | def _get_features(self, i, word, context, prev, prev2):
-        |     '''Map tokens-in-contexts into a feature representation, implemented as a
-        |     set. If the features change, a new model must be trained.'''
-        |     def add(name, *args):
-        |         features.add('+'.join((name,) + tuple(args)))
-       
-        |     features = set()
-        |     add('bias') # This acts sort of like a prior
-        |     add('i suffix', word[-3:])
-        |     add('i pref1', word[0])
-        |     add('i-1 tag', prev)
-        |     add('i-2 tag', prev2)
-        |     add('i tag+i-2 tag', prev, prev2)
-        |     add('i word', context[i])
-        |     add('i-1 tag+i word', prev, context[i])
-        |     add('i-1 word', context[i-1])
-        |     add('i-1 suffix', context[i-1][-3:])
-        |     add('i-2 word', context[i-2])
-        |     add('i+1 word', context[i+1])
-        |     add('i+1 suffix', context[i+1][-3:])
-        |     add('i+2 word', context[i+2])
-        |     return features
-
-    p.
-      I haven’t added any features from external data, such as case frequency
-      statistics from the Google Web 1T corpus. I might add those later, but for
-      now I figured I’d keep things simple.
-      
-    h3 What about search?
-
-    p.
-      The model I’ve recommended commits to its predictions on each word, and
-      moves on to the next one. Those predictions are then used as features for
-      the next word. There’s a potential problem here, but it turns out it doesn’t
-      matter much. It’s easy to fix with beam-search, but I say it’s not really
-      worth bothering. And it definitely doesn’t matter enough to adopt a slow
-      and complicated algorithm like Conditional Random Fields.
-      
-    p.
-      Here’s the problem. The best indicator for the tag at position, say, 3 in
-      a sentence is the word at position 3. But the next-best indicators are the
-      tags at positions 2 and 4. So there’s a chicken-and-egg problem: we want
-      the predictions for the surrounding words in hand before we commit to a
-      prediction for the current word. Here’s an example where search might matter:
-      
-    p.example.
-      Their management plan reforms worked
-      
-    p.
-      Depending on just what you’ve learned from your training data, you can
-      imagine making a different decision if you started at the left and moved
-      right, conditioning on your previous decisions, than if you’d started at
-      the right and moved left.
-      
-    p.
-      If that’s not obvious to you, think about it this way: “worked” is almost
-      surely a verb, so if you tag “reforms” with that in hand, you’ll have a
-      different idea of its tag than if you’d just come from “plan“, which you
-      might have regarded as either a noun or a verb.
-      
-    p.
-      Search can only help you when you make a mistake. It can prevent that error
-      from throwing off your subsequent decisions, or sometimes your future choices
-      will correct the mistake. And that’s why for POS tagging, search hardly matters!
-      Your model is so good straight-up that your past predictions are almost always
-      true. So you really need the planets to align for search to matter at all.
-      
-    p.
-      And as we improve our taggers, search will matter less and less. Instead
-      of search, what we should be caring about is multi-tagging. If we let the
-      model be a bit uncertain, we can get over 99% accuracy assigning an average
-      of 1.05 tags per word (Vadas et al, ACL 2006). The averaged perceptron is
-      rubbish at multi-tagging though. That’s its big weakness. You really want
-      a probability distribution for that.
-
-    p.
-      One caveat when doing greedy search, though. It’s very important that your
-      training data model the fact that the history will be imperfect at run-time.
-      Otherwise, it will be way over-reliant on the tag-history features. Because
-      the Perceptron is iterative, this is very easy.
-      
-    p.
-      Here’s the training loop for the tagger:
-
-    pre.language-python
-      code
-        | def train(self, sentences, save_loc=None, nr_iter=5, quiet=False):
-        |     '''Train a model from sentences, and save it at save_loc. nr_iter
-        |     controls the number of Perceptron training iterations.'''
-        |     self._make_tagdict(sentences, quiet=quiet)
-        |     self.model.classes = self.classes
-        |     prev, prev2 = START
-        |     for iter_ in range(nr_iter):
-        |         c = 0; n = 0
-        |         for words, tags in sentences:
-        |             context = START + [self._normalize(w) for w in words] + END
-        |             for i, word in enumerate(words):
-        |                 guess = self.tagdict.get(word)
-        |                 if not guess:
-        |                     feats = self._get_features(
-        |                               i, word, context, prev, prev2)
-        |                     guess = self.model.predict(feats)
-        |                     self.model.update(tags[i], guess, feats)
-        |                 # Set the history features from the guesses, not the
-        |                 # true tags
-        |                 prev2 = prev; prev = guess
-        |                 c += guess == tags[i]; n += 1
-        |         random.shuffle(sentences)
-        |         if not quiet:
-        |             print(&quot;Iter %d: %d/%d=%.3f&quot; % (iter_, c, n, _pc(c, n)))
-        |     self.model.average_weights()
-        |     # Pickle as a binary file
-        |     if save_loc is not None:
-        |         cPickle.dump((self.model.weights, self.tagdict, self.classes),
-        |                      open(save_loc, 'wb'), -1)
-    p.
-      Unlike the previous snippets, this one’s literal &ndash; I tended to edit the
-      previous ones to simplify. So if they have bugs, hopefully that’s why!
-      
-    p.
-      At the time of writing, I’m just finishing up the implementation before I
-      submit a pull request to TextBlob. You can see the rest of the source here:
-      
-    ul
-      li
-        a(href="https://github.com/sloria/textblob-aptagger/blob/master/textblob_aptagger/taggers.py") taggers.py
-      li
-        a(href="https://github.com/sloria/textblob-aptagger/blob/master/textblob_aptagger/_perceptron.py") _perceptron.py
-      
-    h3 A final comparison…
-    
-    p.
-      Over the years I’ve seen a lot of cynicism about the WSJ evaluation methodology.
-      The claim is that we’ve just been meticulously over-fitting our methods to this
-      data. Actually the evidence doesn’t really bear this out. Mostly, if a technique
-      is clearly better on one evaluation, it improves others as well. Still, it’s
-      very reasonable to want to know how these tools perform on other text. So I
-      ran the unchanged models over two other sections from the OntoNotes corpus:
-      
-    table
-      thead
-        tr
-          th Tagger
-          th WSJ
-          th ABC
-          th Web
-      tbody
-        tr
-          td Pattern
-          td 93.5
-          td 90.7
-          td 88.1
-        tr
-          td NLTK
-          td 94.0
-          td 91.5
-          td 88.4
-        tr
-          td PyGreedyAP
-          td 96.8
-          td 94.8
-          td 91.8
-
-    p.
-      The ABC section is broadcast news, Web is text from the web (blogs etc — I haven’t
-      looked at the data much).
-      
-    p.
-      As you can see, the order of the systems is stable across the three comparisons,
-      and the advantage of our Averaged Perceptron tagger over the other two is real
-      enough. Actually the pattern tagger does very poorly on out-of-domain text.
-      It mostly just looks up the words, so it’s very domain dependent. I hadn’t
-      realised it before, but it’s obvious enough now that I think about it.
-      
-    p.
-      We can improve our score greatly by training on some of the foreign data.
-      The technique described in this paper (Daume III, 2007) is the first thing
-      I try when I have to do that.
-
-
-    footer.meta(role='contentinfo')
-      a.button.button-twitter(href=urls.share_twitter, title='Share on Twitter', rel='nofollow') Share on Twitter
-      .discuss
-        a.button.button-hn(href='#', title='Discuss on Hacker News', rel='nofollow') Discuss on Hacker News
-        | 
-        a.button.button-reddit(href='#', title='Discuss on Reddit', rel='nofollow') Discuss on Reddit
--- a/docs/redesign/change_log.jade
+++ b/docs/redesign/change_log.jade
--- a/docs/redesign/comparisons.jade
+++ b/docs/redesign/comparisons.jade
@ -1,139 +0,0 @@
- var urls = {}
- urls.choi_paper = "http://aclweb.org/anthology/P/P15/P15-1038.pdf"
- urls.emnlp_paper = "honnibal_johnson_emnlp2015.pdf"
-
-
-+comparison("NLTK")
-  p spaCy is:
-  ul
-    li.pro 100x faster;
-    li.pro 50% more accurate;
-    li.pro Serializes TODO% smaller;
-
-  p spaCy features:
-    ul 
-      li.pro Integrated word vectors;
-      li.pro Efficient binary serialization;
-
-  p NLTK features:
-    ul
-      li.con Multiple languages; 
-      li.neutral Educational resources
-
-
-//+comparison("Pattern")
-+comparison("CoreNLP")
-  p spaCy is:
-
-  ul
-    li.pro TODO% faster;
-    li.pro TODO% more accurate;
-    li.pro Not Java;
-    li.pro Well documented;
-    li.pro Cheaper to license commercially;
-    li.neutral
-      | Opinionated/Minimalist. spaCy avoids providing redundant or overlapping
-      | options.  
-
-  p CoreNLP features:
-
-  ul
-    li.con Multiple Languages;
-    li.con Sentiment analysis 
-    li.con Coreference resolution
-
-
-+comparison("ClearNLP")
-  p spaCy is:
-
-  ul
-    li.pro Not Java;
-    li.pro TODO% faster;
-    li.pro Well documented;
-    li.neutral Slightly more accurate;
-
-  p ClearNLP features:
-
-  ul
-    li.con Semantic Role Labelling
-    li.con Multiple Languages
-    li.con Model for biology/life-science;
-
-//+comparison("Accuracy Summary")
-
-//+comparison("Speed Summary")
-//  table
-//    thead
-//      tr
-//        th.
-//        th(colspan=3) Absolute (ms per doc)
-//        th(colspan=3) Relative (to spaCy)
-//
-//    tbody
-//      tr
-//        td: strong System
-//        td: strong Split
-//        td: strong Tag
-//        td: strong Parse
-//        td: strong Split
-//        td: strong Tag
-//        td: strong Parse
-//
-//      +row("spaCy", "0.2ms", "1ms", "19ms", "1x", "1x", "1x")
-//      +row("spaCy", "0.2ms", "1ms", "19ms", "1x", "1x", "1x")
-//      +row("CoreNLP", "2ms", "10ms", "49ms", "10x", "10x", "2.6x")
-//      +row("ZPar", "1ms", "8ms", "850ms", "5x", "8x", "44.7x")
-//      +row("NLTK", "4ms", "443ms", "n/a", "20x", "443x", "n/a")
-//
-//  p
-//    | <strong>Set up</strong>: 100,000 plain-text documents were streamed
-//    | from an SQLite3 database, and processed with an NLP library, to one
-//    | of three levels of detail &ndash; tokenization, tagging, or parsing.
-//    | The tasks are additive: to parse the text you have to tokenize and
-//    | tag it.  The  pre-processing was not subtracted from the times &ndash;
-//    | I report the time required for the pipeline to complete.  I report
-//    | mean times per document, in milliseconds.
-//
-//  p
-//    | <strong>Hardware</strong>: Intel i7-3770 (2012)
-
-
-
-
-
-+comparison("Peer-reviewed Evaluations")
-  p.
-    spaCy is committed to rigorous evaluation under standard methodology.  Two
-    papers in 2015 confirm that:
-  ol
-    li spaCy is the fastest syntactic parser in the world;
-    li Its accuracy is within 1% of the best available;
-    li The few systems that are more accurate are 20&times; slower or more.
-
-  p
-    | spaCy v0.84 was evaluated by researchers at Yahoo! Labs and Emory University,
-    | as part of a survey paper benchmarking the current state-of-the-art dependency
-    | parsers 
-    a(href=urls.choi_paper) (Choi et al., 2015)
-    | .
-
-  table
-    thead
-      +columns("System", "Language", "Accuracy", "Speed")
-
-    tbody
-      +row("spaCy v0.84", "Cython", "90.6", "13,963")
-      +row("spaCy v0.89", "Cython", "91.8", "13,000 (est.)")
-      +row("ClearNLP", "Java", "91.7", "10,271")
-      +row("CoreNLP", "Java", "89.6", "8,602")
-      +row("MATE", "Java", "92.5", "550")
-      +row("Turbo", "C++", "92.4", "349")
-      +row("Yara", "Java", "92.3", "340")
-
-  p
-    | Discussion with the authors led to accuracy improvements in spaCy, which
-    | have been accepted for publication in EMNLP, in joint work with Macquarie
-    | University
-    a(href=urls.emnlp_paper) (Honnibal and Johnson, 2015)
-    | . 
-
--- a/docs/redesign/docs.jade
+++ b/docs/redesign/docs.jade
@ -1,129 +0,0 @@
-extends ./outline.jade
-
-include ./mixins.jade
-
-
-mixin declare_class(name)
-  details
-    summary
-      span.declaration
-        span.label class
-        code #{name}
-    block
-
-mixin method(name, parameters)
-  details(open=attributes.open)
-    summary
-      span.declaration
-        span.label #{name}
-        span.parameters
-          | self, #{parameters}
-    block
-
-
-mixin params
-  ul
-    block
-
-
-mixin param(name, type, value)
-  li
-    if type
-      <strong>#{name}</strong> (!{type}) &#8211;
-    else
-      <strong>#{name}</strong> &#8211;
-    block
-
-
-mixin attribute(name, type, value)
-  details(open=attributes.open)
-    summary
-      span.declaration
-        span.label #{name}
-    block
-
-
-mixin returns(name, type, value)
-  li
-    if type
-      <strong>#{name}</strong> (!{type}) &#8211;
-    else
-      <strong>#{name}</strong> &#8211;
-    block
-
-
-mixin returns(type)
-  | tmp
-
-mixin init
-  details
-    summary: h4 Init
-
-    block
-
-
-mixin callable
-  details
-    summary: h4 Callable
-
-    block
-
-
-mixin sequence
-  details
-    summary: h4 Sequence
-
-    block
-
-
-mixin maptype
-  details
-    summary: h4 Map
-
-    block
-
-
-mixin summary
-  block
-
-mixin en_example
-  pre.language-python
-    code
-      | from spacy.en import English
-      | from spacy._doc_examples import download_war_and_peace
-      | 
-      | unprocessed_unicode = download_war_and_peace()
-      | 
-      | nlp = English()
-      | doc = nlp(unprocessed_unicode)
-
-
-block intro_block
-  section(class="intro")
-
-    nav(role="navigation")
-      ul
-        li: a(href="#api" class="button") API
-        li: a(href="#tutorials" class="button") Tutorials
-        li: a(href="#spec" class="button") Spec
-
-
-block body_block
-  - var py_docs = '<a class="reference" href="http://docs.python.org/library/'
-
-  -
-    var types = {
-      'unicode': py_docs + 'functions.html#unicode"><em>unicode</em></a>',
-      'bool': py_docs + 'functions.html#bool"><em>bool</em></a>',
-      'int': py_docs + 'functions.html#int"><em>int</em></a>',
-      'generator': "",
-      'Vocab': "",
-      'Span': "",
-      'Doc': ""
-    }
-
-  article
-
-    +Section("API", "api", "api.jade")
-    +Section("Tutorials", "tutorials", "tutorials.jade")
-    +Section("Annotation Specifications", "spec", "spec.jade")
--- a/docs/redesign/home.jade
+++ b/docs/redesign/home.jade
@ -1,88 +0,0 @@
-extends ./outline.jade
-
-include ./mixins.jade
-
-// Notes
-//
-// 1. Where to put version notice? Should say something like
-//   2015-08-12: v0.89
-//   and be a link
-//   
-//   Only needs to appear on home page.
-
-
- var slogan = "Build Tomorrow's Language Technologies"
- var tag_line = "spaCy &ndash; " + slogan
-
-mixin lede
-  - var state_of_the_art = '<a href="#">state-of-the-art</a>'
-  - var a_minor_miracle = '<a href="">a minor miracle</a>'
-  - var great_documentation = '<a href="">great documentation</a>'
-  - var concise_API = '<a href="">concise API</a>'
-  
-  p.
-    <a href="https://github.com/honnibal/spaCy"><strong>spaCy</strong></a> is a
-    library for industrial-strength natural language processing in Python and
-    Cython.  It features !{state_of_the_art} speed and accuracy, a !{concise_API},
-    and <a href="#license">license terms</a> designed to get out of your way.
-    If you're a small company doing NLP, we want <strong>spaCy</strong> to seem
-    like !{a_minor_miracle}.
-
-
-mixin comparison(name)
-  details
-    summary
-      h4= name
-
-    block
- 
-mixin columns(...names)
-  tr
-    each name in names
-      th= name
-
-
-mixin row(...cells)
-  tr
-    each cell in cells
-      td= cell
-
-
-mixin social      
-  footer(role="contentinfo")
-    a(href="http://twitter.com/share?text=[ARTICLE HEADLINE]&url=[ARTICLE LINK]&via=honnibal" title="Share on Twitter" rel="nofollow" class="button button-twitter") Share on Twitter
-
-    div.discuss
-      a(href="#" title="Discuss on Hacker News" rel="nofollow" class="button button-hn")
-        | Discuss on Hacker News
-
-      a(href="#" title="Discuss on Reddit" rel="nofollow" class="button button-reddit")
-        | Discuss on Reddit
-
-
-block intro_block
-  section(class="intro")
-    +lede
-
-    nav(role="navigation")
-      ul
-        li: a(href="#example-use" class="button") Examples
-        li: a(href="#comparisons" class="button") Comparisons
-        li: a(href="#online-demo" class="button") Try Online
-        li: a(href="#install" class="button")
-          | Install
-          <span class="button-caption">v0.89</span>
-
-
-
-block body_block
-  article(class="page landing-page")
-
-    +Section("Usage by Example", "example-use", "./usage_examples.jade")
-
-    +Section("Comparisons and Benchmarks", "comparisons", "./comparisons.jade")
-      
-    +Section("Online Demo", "online-demo", "./online_demo.jade")
-
-
-    +Section("Install", "install", "./install.jade")
--- a/docs/redesign/installation.jade
+++ b/docs/redesign/installation.jade
@ -1,71 +0,0 @@
-mixin Option(name, open)
-  details(open=open)
-    summary
-      h4= name
-    block
-
-+Option("conda", true)
-  pre.language-bash: code
-    | $ conda install spacy
-    | $ python -m spacy.en.download
-
-+Option("pip and virtualenv", true)
-  p With Python 2.7 or Python 3, using Linux or OSX, run:
-
-    pre.language-bash: code
-      | $ pip install spacy
-      | $ python -m spacy.en.download
-
-  p
-    | The download command fetches and installs about 300mb of data, for
-    | the parser model and word vectors, which it installs within the spacy.en
-    | package directory.
-
-
-  +Option("Workaround for obsolete system Python", false)
-    p
-      | If you're stuck using a server with an old version of Python, and you
-      | don't have root access, I've prepared a bootstrap script to help you
-      | compile a local Python install.  Run:
-
-    pre.language-bash: code
-      | $ curl https://raw.githubusercontent.com/honnibal/spaCy/master/bootstrap_python_env.sh | bash && source .env/bin/activate
-
-
-
-+Option("Compile from source", false)
-  p
-    | The other way to install the package is to clone the github repository,
-    | and build it from source.  This installs an additional dependency,
-    | Cython.  If you're using Python 2, I also recommend installing fabric
-    | and fabtools &ndash; this is how I build the project.
-
-  pre.language-bash: code
-    | $ git clone https://github.com/honnibal/spaCy.git
-    | $ cd spaCy
-    | $ virtualenv .env && source .env/bin/activate
-    | $ export PYTHONPATH=`pwd`
-    | $ pip install -r requirements.txt
-    | $ python setup.py build_ext --inplace
-    | $ python -m spacy.en.download
-    | $ pip install pytest
-    | $ py.test tests/
-
-  p
-    | Python packaging is awkward at the best of times, and it's particularly tricky
-    | with C extensions, built via Cython, requiring large data files.  So,
-    | please report issues as you encounter them.
-
-+Option("pypy (Unsupported)")
-  | If PyPy support is a priority for you, please get in touch.  We could likely
-  | fix the remaining issues, if necessary.  However, the library is likely to
-  | be much slower on PyPy, as it's written in Cython, which produces code tuned
-  | for the performance of CPython.
-
-+Option("Windows (Unsupported)")
-  | Unfortunately we don't currently have access to a Windows machine, and have
-  | no experience developing on a MicroSoft stack. In theory the only problems are
-  | with the installation and packaging &ndash; there should be no deep platform
-  | dependency. Unfortunately we can't debug these issues at present, simply due
-  | to lack of a development environment.
-
--- a/docs/redesign/license.jade
+++ b/docs/redesign/license.jade
@ -1,179 +0,0 @@
-extends ./outline.jade
-
-mixin columns(...names)
-  tr
-    each name in names
-      th= name
-
-
-mixin row(...cells)
-  tr
-    each cell in cells
-      td= cell
-
-
-mixin LicenseOption(name, period, price, audience)
-    .item
-      h4 #{name}
-        
-      .focus #{period}
-
-      span #{price}
-        
-      h5 Suggested for:
-        
-      span #{audience}
-        
-      a.button(href="spacy_trial_free.docx") Download license
-
-      span or 
-        a(href="#") get in touch
-
- 
-block body_block
-  article.pricing
-
-    .box.license
-      +LicenseOption("Trial", "90 days", "$0", "Evaluation")
-      +LicenseOption("Production", "1 year", "$5,000", "Production")
-      +LicenseOption("Certainty", "5 years", "$20,000", "Secure Planning")
-
-    p.caption
-      | Researcher, hobbyist, or open-source developer? spaCy also offers 
-      a(href="http://www.gnu.org/licenses/agpl-3.0.en.html") AGPLv3 
-      | licenses.
-
-    p.
-      What we offer is a rare, simple certainty: a long-term, permissive license
-      that comes with full access to the source, complete transparency, and almost
-      complete flexibility.  The difference between this and a black-box API is
-      night and day.  You cannot build a great product against a service you
-      don't understand, and you can't build a great business on a service you
-      don't control.
-      
-    p
-      | Let's face it: services disappear.  Constantly. The good start-ups get
-      | bought; the bad ones go bankrupt.  Open-source projects become abandoned
-      | or bloated.  Google's graveyard is over-flowing &ndash; ditto for Yahoo!,
-      | Microsoft, etc. Sure, IBM won't be broke...But will BlueMix be sunset?
-
-    p
-      | A 5 year license won't expire until 2020.  spaCy will be with you for
-      | longer than most of your current staff.  If that's still not enough,
-      | get in touch. I'm sure we can work something out.
-
-    //p.
-    //  To make spaCy as valuable as possible, licenses to it are for life.  You get
-    //  complete transparency, certainty and control.  If you need to use spaCy
-    //  as an API, it's trivial to host it yourself &ndash; and you don't need to
-    //  worry about the service changing or disappearing.  And if you're ever in
-    //  acquisition or IPO talks, the story is simple.
-
-    //p.
-    //  spaCy can also be used as free open-source software, under the Aferro GPL
-    //  license.  If you use it this way, you must comply with the AGPL license
-    //  terms.  When you distribute your project, or offer it as a network service,
-    //  you must distribute the source-code and grant users an AGPL license to it.
-
-
-    //h3 Examples
-
-    //p.
-    //  In order to clarify how spaCy's license structure might apply to you, I've
-    //  written a few examples, in the form of user-stories.
-
-    //details
-    //  summary: h4 Seed stage start-ups
-
-    //  p.
-    //    Ashley and Casey have an idea for a start-up.  To explore their idea, they
-    //    want to build a minimum viable product they can put in front of potential
-    //    users and investors.
-
-    //  p. They have two options.
-
-    //  ol
-    //    li
-    //      p.
-    //        <strong>Trial commercial license.</strong> With a simple form, they can
-    //        use spaCy for 90 days, for a nominal fee of $1.  They are free to modify
-    //        spaCy, and they will own the copyright to their modifications for the
-    //        duration of the license.  After the trial period elapses, they can either
-    //        pay the license fee, stop using spaCy, release their project under the
-    //        AGPL.
-    //
-    //    li
-    //      p.
-    //        <strong>AGPL.</strong> Casey and Pat can instead use spaCy under the AGPL
-    //        license. However, they must then release any code that statically or
-    //        dynamically links to spaCy under the AGPL as well (e.g. if they import
-    //        the module, or import a module that imports it, etc).  They also cannot
-    //        use spaCy as a network resource, by running it as a service --- this is
-    //        the loophole that the "A" part of the AGPL is designed to close.
-    //  
-    //  p.
-    //    Ashley and Casey find the AGPL license unattractive for commercial use.
-    //    They decide to take up the trial commercial license.  However,  over the
-    //    next 90 days, Ashley has to move house twice, and Casey gets sick.  By
-    //    the time the trial expires, they still don't have a demo they can show
-    //    investors.  They send an email explaining the situation, and a 90 day extension
-    //    to their trial license is granted.
-
-    //  p.
-    //    By the time the extension period has elapsed, spaCy has helped them secure
-    //    funding, and they even have a little revenue.  They are glad to pay the
-    //    $5,000 commercial license fee.
-
-    //  p.
-    //    spaCy is now permanently licensed for the product Ashley and Casey are
-    //    developing.  They own the copyright to any modifications they make to spaCy,
-    //    but not to the original spaCy code.
-
-    //  p.
-    //    No additional fees will be due when they hire new developers, run spaCy on
-    //    additional internal servers, etc.  If their company is acquired, the license
-    //    will be transferred to the company acquiring them.  However, to use spaCy
-    //    in another product, they will have to buy a second license.
-
-
-    // details
-    //  summary: h4 University academics
-
-    //  p.
-    //    Alex and Sasha are post-doctoral researchers working for a university.
-    //    Part of their funding comes from a grant from Google, but Google will not
-    //    own any part of the work that they produce.  Their mission is just to write
-    //    papers.
-
-    //  p.
-    //    Alex and Sasha find spaCy convenient, so they use it in their system under
-    //    the AGPL.  This means that their system must also be released under the
-    //    AGPL, but they're cool with that &ndash; they were going to release their
-    //    code anyway, as it's the only way to ensure their experiments are properly
-    //    repeatable.
-
-    //  p.
-    //    Alex and Sasha find and fix a few bugs in spaCy.  They must release these
-    //    modifications, and they ask that they be accepted into the main spaCy repo.
-    //    In order to do this, they must sign a contributor agreement, ceding their
-    //    copyright.  When commercial licenses to spaCy are sold, Alex and Sasha will
-    //    not be able to claim any royalties from their contributions.
-
-    //  p.
-    //    Later, Alex and Sasha implement new features into spaCy, for another paper.
-    //    The code was quite rushed, and they don't want to take the time to put
-    //    together a proper pull request.  They must release their modifications
-    //    under the AGPL, but they are not obliged to contribute it to the spaCy
-    //    repository, or concede their copyright.
-
-    // details
-    //  summary: h4 Open Source developers
-
-    //  p.
-    //    Phuong and Jessie use the open-source software Calibre to manage their
-    //    e-book libraries.  They have an idea for a search feature, and they want
-    //    to use spaCy to implement it.  Calibre is released under the GPLv3.  The
-    //    AGPL has additional restrictions for projects used as a network resource,
-    //    but they don't apply to this project, so Phuong and Jessie can use spaCy
-    //    to improve Calibre.  They'll have to release their code, but that was
-    //    always their intention anyway.
--- a/docs/redesign/mixins.jade
+++ b/docs/redesign/mixins.jade
@ -1,17 +0,0 @@
-mixin Section(title_text, link_name, include_file)
-  h3: a(name=link_name) #{title_text}
-
-  if (link_name == "example-use")
-    include ./usage_examples.jade
-  else if (link_name == "online-demo")
-    include ./online_demo.jade
-  else if (link_name == "comparisons")
-    include ./comparisons.jade
-  else if (link_name == "install")
-    include ./installation.jade
-  else if (link_name == "api")
-    include ./api.jade
-  else if (link_name == "tutorials")
-    include ./tutorials.jade
-  else if (link_name == "spec")
-    include ./spec.jade
--- a/docs/redesign/online_demo.jade
+++ b/docs/redesign/online_demo.jade
@ -1,18 +0,0 @@
-mixin Displacy(sentence, caption_text, height)
-  - var url = "http://ines.io/displacy/?full=" + sentence.replace(" ", "%20")
-
-  .displacy
-    iframe.displacy(src="displacy/displacy_demo.html" height=height)
-    
-    a.view-displacy(href=url)
-      | Interactive Visualizer
-
-    p.caption.
-      #{caption_text}
-
-
-+Displacy(
-  "Click the button to see this sentence in displaCy.",
-  "The best parse-tree visualizer and annotation tool in all the land.",
-  275
-)
--- a/docs/redesign/outline.jade
+++ b/docs/redesign/outline.jade
@ -1,37 +0,0 @@
- var slogan = "Build Tomorrow's Language Technologies"
- var tag_line = "spaCy &ndash; " + slogan
-
-
-doctype html
-html(lang="en")
-  head
-    meta(charset="utf-8")
-    title!= tag_line
-    meta(name="description" content="")
-    meta(name="author" content="Matthew Honnibal")
-    link(rel="stylesheet" href="css/style.css")
-    <!--[if lt IE 9]>
-    script(src="http://html5shiv.googlecode.com/svn/trunk/html5.js")
-    <![endif]-->
-
-  body(id="home" role="document")
-    header(role="banner")
-      h1(class="logo")!= tag_line
-      div(class="slogan")!= slogan
-
-    nav(role="navigation")
-      ul
-        li: a(href="home.html") Home
-        li: a(href="docs.html") Docs
-        li: a(href="license.html") License
-        li: a(href="blog.html") Blog
-
-    main(id="content" role="main")
-      block intro_block
-
-      block body_block
- 
-  footer(role="contentinfo")
-
-  script(src="js/prism.js")
-  script(src="js/details_polyfill.js")
--- a/docs/redesign/spec.jade
+++ b/docs/redesign/spec.jade
@ -1,129 +0,0 @@
-mixin columns(...names)
-  tr
-    each name in names
-      th= name
-
-
-mixin row(...cells)
-  tr
-    each cell in cells
-      td= cell
-
-
-details
-  summary: h4 Overview
-  
-  p.
-    This document describes the target annotations spaCy is trained to predict.
-    This is currently a work in progress. Please ask questions on the issue tracker,
-    so that the answers can be integrated here to improve the documentation.
-
-details
-  summary: h4 Tokenization
-
-  p Tokenization standards are based on the OntoNotes 5 corpus.
-
-  p.
-    The tokenizer differs from most by including tokens for significant
-    whitespace. Any sequence of whitespace characters beyond a single space
-    (' ') is included as a token. For instance:
-
-  pre.language-python
-    code
-      | from spacy.en import English
-      | nlp = English(parse=False)
-      | tokens = nlp('Some\nspaces  and\ttab characters')
-      | print([t.orth_ for t in tokens])
-        
-  p Which produces:
-    
-  pre.language-python
-    code
-      | ['Some', '\n', 'spaces', ' ', 'and', '\t', 'tab', 'characters']
-
-  p.
-    The whitespace tokens are useful for much the same reason punctuation is
-    &ndash; it's often an important delimiter in the text.  By preserving
-    it in the token output, we are able to maintain a simple alignment
-    between the tokens and the original string, and we ensure that no
-    information is lost during processing.
-
-details
-  summary: h4 Sentence boundary detection
-
-  p.
-    Sentence boundaries are calculated from the syntactic parse tree, so
-    features such as punctuation and capitalisation play an important but
-    non-decisive role in determining the sentence boundaries.  Usually this
-    means that the sentence boundaries will at least coincide with clause
-    boundaries, even given poorly punctuated text.
-
-details
-  summary: h4 Part-of-speech Tagging
-
-  p.
-    The part-of-speech tagger uses the OntoNotes 5 version of the Penn Treebank
-    tag set.  We also map the tags to the simpler Google Universal POS Tag set.
-
-  p.
-    Details here: https://github.com/honnibal/spaCy/blob/master/spacy/en/pos.pyx#L124
-
-details
-  summary: h4 Lemmatization
-
-  p.
-    A "lemma" is the uninflected form of a word. In English, this means:
-
-  ul
-    li Adjectives: The form like "happy", not "happier" or "happiest"
-    li Adverbs: The form like "badly", not "worse" or "worst"
-    li Nouns: The form like "dog", not "dogs"; like "child", not "children"
-    li Verbs: The form like "write", not "writes", "writing", "wrote" or "written" 
-
-  p.
-    The lemmatization data is taken from WordNet. However, we also add a
-    special case for pronouns: all pronouns are lemmatized to the special
-    token -PRON-.
-
-
-details
-  summary: h4 Syntactic Dependency Parsing
-
-  p.
-    The parser is trained on data produced by the ClearNLP converter. Details
-    of the annotation scheme can be found here:  http://www.mathcs.emory.edu/~choi/doc/clear-dependency-2012.pdf
-
-details
-  summary: h4 Named Entity Recognition
-
-  table
-    thead
-      +columns("Entity Type", "Description")
-      
-    tbody
-      +row("PERSON", "People, including fictional.")
-      +row("NORP", "Nationalities or religious or political groups.")
-      +row("FACILITY", "Buildings, airports, highways, bridges, etc.")
-      +row("ORG", "Companies, agencies, institutions, etc.")
-      +row("GPE", "Countries, cities, states.")
-      +row("LOC", "Non-GPE locations, mountain ranges, bodies of water.")
-      +row("PRODUCT", "Vehicles, weapons, foods, etc. (Not services")
-      +row("EVENT", "Named hurricanes, battles, wars, sports events, etc.")
-      +row("WORK_OF_ART", "Titles of books, songs, etc.")
-      +row("LAW", "Named documents made into laws")
-      +row("LANGUAGE", "Any named language")
-
-  p The following values are also annotated in a style similar to names:
-
-  table
-    thead
-      +columns("Entity Type", "Description")
-      
-    tbody
-      +row("DATE", "Absolute or relative dates or periods")
-      +row("TIME", "Times smaller than a day")
-      +row("PERCENT", 'Percentage (including “%”)')
-      +row("MONEY", "Monetary values, including unit")
-      +row("QUANTITY", "Measurements, as of weight or distance")
-      +row("ORDINAL", 'first", "second"')
-      +row("CARDINAL", "Numerals that do not fall under another type")
--- a/docs/redesign/template_post.jade
+++ b/docs/redesign/template_post.jade
@ -1,31 +0,0 @@
-doctype html
-html(lang='en')
-  head
-    meta(charset='utf-8')
-    title spaCy Blog
-    meta(name='description', content='')
-    meta(name='author', content='Matthew Honnibal')
-    link(rel='stylesheet', href='css/style.css')
-    //if lt IE 9
-      script(src='http://html5shiv.googlecode.com/svn/trunk/html5.js')
-  body#blog(role="document")
-    header(role='banner')
-      h1.logo spaCy Blog
-      .slogan Blog
-
-    nav(role="navigation")
-      ul
-        li: a(href="home.html")        Home
-        li: a(href="docs.html")        Docs
-        li.active: a(href="blog.html") Blog
-        li: a(href="license.html")     License
-
-    main#content(role='main')
-      block intro_block
-
-      block body_block
- 
-  footer(role='contentinfo')
-
-  script(src="js/prism.js")
-  script(src="js/details_polyfill.js")
--- a/docs/redesign/tute_adverbs.jade
+++ b/docs/redesign/tute_adverbs.jade
@ -1,200 +0,0 @@
-doctype html
-html(lang='en')
-  head
-    meta(charset='utf-8')
-    title spaCy Blog
-    meta(name='description', content='')
-    meta(name='author', content='Matthew Honnibal')
-    link(rel='stylesheet', href='css/style.css')
-    //if lt IE 9
-      script(src='http://html5shiv.googlecode.com/svn/trunk/html5.js')
-  body#blog
-    header(role='banner')
-      h1.logo spaCy Blog
-      .slogan Blog
-    main#content(role='main')
-      article.post
- 
-
-        :markdown-it
-          # Adverbs
-  
-          Let's say you're developing a proofreading tool, or possibly an IDE for
-          writers.  You're convinced by Stephen King's advice that `adverbs are
-          not your friend <http://www.brainpickings.org/2013/03/13/stephen-king-on-adverbs/>`_,
-          so you want to **highlight all adverbs**.  We'll use one of the examples
-          he finds particularly egregious:
-    
-        pre.language-python
-          code
-            | import spacy.en
-            | >>> from spacy.parts_of_speech import ADV
-            | >>> # Load the pipeline, and call it with some text.
-            | >>> nlp = spacy.en.English()
-            | >>> tokens = nlp(u"‘Give it back,’ he pleaded abjectly, ‘it’s mine.’", tag=True, parse=False)
-            | >>> print u''.join(tok.string.upper() if tok.pos == ADV else tok.string for tok in tokens)
-            | u‘Give it BACK,’ he pleaded ABJECTLY, ‘it’s mine.’
-  
-        :markdown-it
-          Easy enough --- but the problem is that we've also highlighted "back".
-          While "back" is undoubtedly an adverb, we probably don't want to highlight
-          it. If what we're trying to do is flag dubious stylistic choices, we'll
-          need to refine our logic.  It turns out only a certain type of adverb
-          is of interest to us.
-
-
-        :markdown-it
-          There are lots of ways we might do this, depending on just what words
-          we want to flag.  The simplest way to exclude adverbs like "back" and
-          "not" is by word frequency: these words are much more common than the
-          prototypical manner adverbs that the style guides are worried about.
-  
-        :markdown-it
-          The :py:attr:`Lexeme.prob` and :py:attr:`Token.prob` attribute gives a
-          log probability estimate of the word:
-  
-        pre.language-python
-          code
-            | >>> nlp.vocab[u'back'].prob
-            | -7.403977394104004
-            | >>> nlp.vocab[u'not'].prob
-            | -5.407193660736084
-            | >>> nlp.vocab[u'quietly'].prob
-            | -11.07155704498291
-  
-        :markdown-it
-          (The probability estimate is based on counts from a 3 billion word corpus,
-          smoothed using the `Simple Good-Turing`_ method.)
-  
-          So we can easily exclude the N most frequent words in English from our
-          adverb marker.  Let's try N=1000 for now:
- 
-        pre.language-python
-          code
-            | >>> import spacy.en
-            | >>> from spacy.parts_of_speech import ADV
-            | >>> nlp = spacy.en.English()
-            | >>> # Find log probability of Nth most frequent word
-            | >>> probs = [lex.prob for lex in nlp.vocab]
-            | >>> probs.sort()
-            | >>> is_adverb = lambda tok: tok.pos == ADV and tok.prob < probs[-1000]
-            | >>> tokens = nlp(u"‘Give it back,’ he pleaded abjectly, ‘it’s mine.’")
-            | >>> print u''.join(tok.string.upper() if is_adverb(tok) else tok.string for tok in tokens)
-            | ‘Give it back,’ he pleaded ABJECTLY, ‘it’s mine.’
-        
-        :markdown-it
-          There are lots of other ways we could refine the logic, depending on
-          just what words we want to flag.  Let's say we wanted to only flag
-          adverbs that modified words similar to "pleaded".  This is easy to do,
-          as spaCy loads a vector-space representation for every word (by default,
-          the vectors produced by `Levy and Goldberg (2014)`_).  Naturally, the
-          vector is provided as a numpy array:
-
-        pre.language-python
-          code
-            | >>> pleaded = tokens[7]
-            | >>> pleaded.repvec.shape
-            | (300,)
-            | >>> pleaded.repvec[:5]
-            | array([ 0.04229792,  0.07459262,  0.00820188, -0.02181299,  0.07519238], dtype=float32)
-  
-        :markdown-it
-          We want to sort the words in our vocabulary by their similarity to
-          "pleaded".  There are lots of ways to measure the similarity of two
-          vectors.  We'll use the cosine metric:
-
-        pre.language-python
-          code 
-            | >>> from numpy import dot
-            | >>> from numpy.linalg import norm
-  
-            | >>> cosine = lambda v1, v2: dot(v1, v2) / (norm(v1) * norm(v2))
-            | >>> words = [w for w in nlp.vocab if w.has_repvec]
-            | >>> words.sort(key=lambda w: cosine(w.repvec, pleaded.repvec))
-            | >>> words.reverse()
-            | >>> print('1-20', ', '.join(w.orth_ for w in words[0:20]))
-            | 1-20 pleaded, pled, plead, confessed, interceded, pleads, testified, conspired, motioned, demurred, countersued, remonstrated, begged, apologised, consented, acquiesced, petitioned, quarreled, appealed, pleading
-            | >>> print('50-60', ', '.join(w.orth_ for w in words[50:60]))
-            | 50-60 counselled, bragged, backtracked, caucused, refiled, dueled, mused, dissented, yearned, confesses
-            | >>> print('100-110', ', '.join(w.orth_ for w in words[100:110]))
-            | 100-110 cabled, ducked, sentenced, perjured, absconded, bargained, overstayed, clerked, confided, sympathizes
-            | >>> print('1000-1010', ', '.join(w.orth_ for w in words[1000:1010]))
-            | 1000-1010 scorned, baled, righted, requested, swindled, posited, firebombed, slimed, deferred, sagged
-            | >>> print('50000-50010', ', '.join(w.orth_ for w in words[50000:50010]))
-            | 50000-50010, fb, ford, systems, puck, anglers, ik, tabloid, dirty, rims, artists
-  
-        :markdown-it
-          As you can see, the similarity model that these vectors give us is excellent
-          --- we're still getting meaningful results at 1000 words, off a single
-          prototype!  The only problem is that the list really contains two clusters of
-          words: one associated with the legal meaning of "pleaded", and one for the more
-          general sense.  Sorting out these clusters is an area of active research.
-  
-          A simple work-around is to average the vectors of several words, and use that
-          as our target:
-  
-        pre.language-python
-          code
-            | >>> say_verbs = ['pleaded', 'confessed', 'remonstrated', 'begged', 'bragged', 'confided', 'requested']
-            | >>> say_vector = sum(nlp.vocab[verb].repvec for verb in say_verbs) / len(say_verbs)
-            | >>> words.sort(key=lambda w: cosine(w.repvec * say_vector))
-            | >>> words.reverse()
-            | >>> print('1-20', ', '.join(w.orth_ for w in words[0:20]))
-            | 1-20 bragged, remonstrated, enquired, demurred, sighed, mused, intimated, retorted, entreated, motioned, ranted, confided, countersued, gestured, implored, interceded, muttered, marvelled, bickered, despaired
-            | >>> print('50-60', ', '.join(w.orth_ for w in words[50:60]))
-            | 50-60 flaunted, quarrelled, ingratiated, vouched, agonized, apologised, lunched, joked, chafed, schemed
-            | >>> print('1000-1010', ', '.join(w.orth_ for w in words[1000:1010]))
-            | 1000-1010 hoarded, waded, ensnared, clamoring, abided, deploring, shriveled, endeared, rethought, berate
-  
-        :markdown-it
-          These definitely look like words that King might scold a writer for attaching
-          adverbs to.  Recall that our original adverb highlighting function looked like
-          this:
-  
-        pre.language-python
-          code
-            | >>> import spacy.en
-            | >>> from spacy.parts_of_speech import ADV
-            | >>> # Load the pipeline, and call it with some text.
-            | >>> nlp = spacy.en.English()
-            | >>> tokens = nlp("‘Give it back,’ he pleaded abjectly, ‘it’s mine.’",
-            |                  tag=True, parse=False)
-            | >>> print(''.join(tok.string.upper() if tok.pos == ADV else tok.string for tok in tokens))
-            | ‘Give it BACK,’ he pleaded ABJECTLY, ‘it’s mine.’
-  
-  
-        :markdown-it
-          We wanted to refine the logic so that only adverbs modifying evocative
-          verbs of communication, like "pleaded", were highlighted.  We've now
-          built a vector that represents that type of word, so now we can highlight
-          adverbs based on subtle logic, honing in on adverbs that seem the most
-          stylistically problematic, given our starting assumptions:
-  
-        pre.language-python
-          code
-            | >>> import numpy
-            | >>> from numpy import dot
-            | >>> from numpy.linalg import norm
-            | >>> import spacy.en
-            | >>> from spacy.parts_of_speech import ADV, VERB
-            | >>> cosine = lambda v1, v2: dot(v1, v2) / (norm(v1) * norm(v2))
-            | >>> def is_bad_adverb(token, target_verb, tol):
-            | ...   if token.pos != ADV
-            | ...     return False
-            | ...   elif token.head.pos != VERB:
-            | ...     return False
-            | ...   elif cosine(token.head.repvec, target_verb) < tol:
-            | ...     return False
-            | ...   else:
-            | ...     return True
-  
-        :markdown-it
-          This example was somewhat contrived --- and, truth be told, I've never
-          really bought the idea that adverbs were a grave stylistic sin.  But
-          hopefully it got the message across: the state-of-the-art NLP technologies
-          are very powerful. spaCy gives you easy and efficient access to them,
-          which lets you build all sorts of useful products and features that
-          were previously impossible.
-
-  footer(role='contentinfo')
-  script(src='js/prism.js')
--- a/docs/redesign/tute_syntax_search.jade
+++ b/docs/redesign/tute_syntax_search.jade
@ -1,132 +0,0 @@
-doctype html
-html(lang='en')
-  head
-    meta(charset='utf-8')
-    title spaCy Blog
-    meta(name='description', content='')
-    meta(name='author', content='Matthew Honnibal')
-    link(rel='stylesheet', href='css/style.css')
-    //if lt IE 9
-      script(src='http://html5shiv.googlecode.com/svn/trunk/html5.js')
-  body#blog
-    header(role='banner')
-      h1.logo spaCy Blog
-      .slogan Blog
-    main#content(role='main')
-      section.intro
-        p
-          | Example use of the spaCy NLP tools for data exploration.
-          | Here we will look for reddit comments that describe Google doing something,
-          | i.e. discuss the company's actions. This is difficult, because other senses of
-          | "Google" now dominate usage of the word in conversation, particularly references to
-          | using Google products.
-        
-        p
-          | The heuristics used are quick and dirty &ndash; about 5 minutes work.
-          
-        //| A better approach is to use the word vector of the verb. But, the
-        //  | demo here is just to show what's possible to build up quickly, to
-        //  | start to understand some data.
-
-      article.post
-        header
-          h2 Syntax-specific Search
-          .subhead
-            | by 
-            a(href='#', rel='author') Matthew Honnibal
-            |  on 
-            time(datetime='2015-08-14') August
-          
-        details
-          summary: h4 Imports
-
-          pre.language-python
-            code
-              | from __future__ import unicode_literals
-              | from __future__ import print_function
-              | import sys
-              | 
-              | import plac
-              | import bz2
-              | import ujson
-              | import spacy.en
-          
-        details
-          summary: h4 Load the model and iterate over the data
-
-          pre.language-python
-            code 
-              | def main(input_loc):
-              |     nlp = spacy.en.English()                 # Load the model takes 10-20 seconds.
-              |     for line in bz2.BZ2File(input_loc):      # Iterate over the reddit comments from the dump. 
-              |         comment_str = ujson.loads(line)['body']  # Parse the json object, and extract the 'body' attribute. 
-              |         
-        details
-          summary: h4 Apply the spaCy NLP pipeline, and look for the cases we want
-
-          pre.language-python
-            code
-              |         comment_parse = nlp(comment_str) 
-              |         for word in comment_parse:  
-              |             if google_doing_something(word):
-              |                 # Print the clause
-              |                 print(''.join(w.string for w in word.head.subtree).strip())
-        details
-          summary: h4 Define the filter function
-
-          pre.language-python
-            code
-
-              | 
-              | def google_doing_something(w):
-              |     if w.lower_ != 'google':
-              |         return False
-              |     # Is it the subject of a verb?
-              |     elif w.dep_ != 'nsubj': 
-              |         return False
-              |     # And not 'is'
-              |     elif w.head.lemma_ == 'be' and w.head.dep_ != 'aux': 
-              |         return False
-              |     # Exclude e.g. "Google says..."
-              |     elif w.head.lemma_ in ('say', 'show'): 
-              |         return False
-              |     else:
-              |         return True
-              | 
-              | 
-
-        details
-          summary: h4 Call main
-
-          pre.language-python
-            code
-              | if __name__ == '__main__':
-              |     plac.call(main)
-
-        details
-          summary: h4 Example output
-
-          p.
-            Many false positives remain. Some are from incorrect interpretations
-            of the sentence by spaCy, some are flaws in our filtering logic. But
-            the results are vastly better than a string-based search, which returns
-            almost no examples of the pattern we're looking for.
-
-          code
-            | Google dropped support for Android < 4.0 already
-            | google drive
-            | Google to enforce a little more uniformity in its hardware so that we can see a better 3rd party market for things like mounts, cases, etc
-            | When Google responds
-            | Google translate cyka pasterino.
-            | A quick google looks like Synology does have a sync'ing feature which does support block level so that should work 
-            | (google came up with some weird One Piece/FairyTail crossover stuff), and is their knowledge universally infallible?
-            | Until you have the gear, google some videos on best farming runs on each planet, you can get a lot REAL fast with the right loop.
-            | Google offers something like this already, but it is truly terrible.
-            | google isn't helping me
-            | Google tells me: 0 results, 250 pages removed from google.
-            | how did Google swoop in and eat our lunch
-
-            
-
-  script(src="js/prism.js")
-  script(src="js/details_polyfill.js")
--- a/docs/redesign/tute_twitter.jade
+++ b/docs/redesign/tute_twitter.jade
@ -1,204 +0,0 @@
-doctype html
-html(lang='en')
-  head
-    meta(charset='utf-8')
-    title spaCy Blog
-    meta(name='description', content='')
-    meta(name='author', content='Matthew Honnibal')
-    link(rel='stylesheet', href='css/style.css')
-    //if lt IE 9
-      script(src='http://html5shiv.googlecode.com/svn/trunk/html5.js')
-  body#blog
-    header(role='banner')
-      h1.logo spaCy Blog
-      .slogan Blog
-    main#content(role='main')
-      article.post
-        header
-          h2 Finding Relevant Tweets
-          .subhead
-            | by 
-            a(href='#', rel='author') Matthew Honnibal
-            |  on 
-            time(datetime='2015-08-14') December
-          
-        details
-          summary: h4 Imports
-          pre.language-python
-
-            | from __future__ import unicode_literals, print_function
-            | import plac
-            | import codecs
-            | import sys
-            | import math
-            | 
-            | import spacy.en
-            | from spacy.parts_of_speech import VERB, NOUN, ADV, ADJ
-            | 
-            | from termcolor import colored
-            | from twython import TwythonStreamer
-            | 
-            | from os import path
-            | from math import sqrt
-            | 
-            | from numpy import dot
-            | from numpy.linalg import norm
-            | 
-            | 
-
-        details
-          summary: h4 Simple vector-averaging similarity
-
-          pre.language-python: code
-
-            | class Meaning(object):
-            |     def __init__(self, vectors):
-            |         if vectors:
-            |             self.vector = sum(vectors) / len(vectors)
-            |             self.norm = norm(self.vector)
-            |         else:
-            |             self.vector = None
-            |             self.norm = 0
-            | 
-            |     @classmethod
-            |     def from_path(cls, nlp, loc):
-            |         with codecs.open(loc, 'r', 'utf8') as file_:
-            |             terms = file_.read().strip().split()
-            |         return cls.from_terms(nlp, terms)
-            | 
-            |     @classmethod
-            |     def from_tokens(cls, nlp, tokens):
-            |         vectors = [t.repvec for t in tokens]
-            |         return cls(vectors)
-            | 
-            |     @classmethod
-            |     def from_terms(cls, nlp, examples):
-            |         lexemes = [nlp.vocab[eg] for eg in examples]
-            |         vectors = [eg.repvec for eg in lexemes]
-            |         return cls(vectors)
-            | 
-            |     def similarity(self, other):
-            |         if not self.norm or not other.norm:
-            |             return -1
-            |         return dot(self.vector, other.vector) / (self.norm * other.norm)
-            | 
-
-        details
-          summary: h4 Print matches
-              
-          pre.language-python: code
-
-            | 
-            | def print_colored(model, stream=sys.stdout):
-            |     if model['is_match']:
-            |         color = 'green'
-            |     elif model['is_reject']:
-            |         color = 'red'
-            |     else:
-            |         color = 'grey'
-            |     
-            |     if not model['is_rare'] and model['is_match'] and not model['is_reject']:
-            |         match_score = colored('%.3f' % model['match_score'], 'green')
-            |         reject_score = colored('%.3f' % model['reject_score'], 'red')
-            |         prob = '%.5f' % model['prob']
-            | 
-            |         print(match_score, reject_score, prob)
-            |         print(repr(model['text']), color)
-            |         print('')
-            | 
-            | 
-
-        details
-          summary: h4 TextMatcher: Process the tweets using spaCy
-
-          pre.language-python: code
-
-            | class TextMatcher(object):
-            |     def __init__(self, nlp, get_target, get_reject, min_prob, min_match, max_reject):
-            |         self.nlp = nlp
-            |         self.get_target = get_target
-            |         self.get_reject = get_reject
-            |         self.min_prob = min_prob
-            |         self.min_match = min_match
-            |         self.max_reject = max_reject
-            | 
-            |     def __call__(self, text):
-            |         tweet = self.nlp(text)
-            |         target_terms = self.get_target()
-            |         reject_terms = self.get_reject()
-            | 
-            |         prob = sum(math.exp(w.prob) for w in tweet) / len(tweet)
-            |         meaning = Meaning.from_tokens(self, tweet)
-            |         
-            |         match_score = meaning.similarity(self.get_target())
-            |         reject_score = meaning.similarity(self.get_reject())
-            |         return {
-            |             'text': tweet.string,
-            |             'prob': prob,
-            |             'match_score': match_score,
-            |             'reject_score': reject_score,
-            |             'is_rare': prob < self.min_prob,
-            |             'is_match': prob >= self.min_prob  and match_score  >= self.min_match,
-            |             'is_reject': prob >= self.min_prob and reject_score >= self.max_reject
-            |         }
-            | 
-            | 
-
-        details
-          summary: h4 Connect to Twitter and stream tweets
-
-          pre.language-python: code
-
-            | class Connection(TwythonStreamer):
-            |     def __init__(self, keys_dir, handler, view):
-            |         keys = Secrets(keys_dir)
-            |         TwythonStreamer.__init__(self, keys.key, keys.secret, keys.token, keys.token_secret) 
-            |         self.handler = handler
-            |         self.view = view
-            | 
-            |     def on_success(self, data):
-            |         text = data.get('text', u'')
-            |         # Twython returns either bytes or unicode, depending on tweet.
-            |         # #APIshaming
-            |         try:
-            |             model = self.handler(text)
-            |         except TypeError:
-            |             model = self.handler(text.decode('utf8'))
-            |         status = self.view(model, sys.stdin)
-            | 
-            |     def on_error(self, status_code, data):
-            |         print(status_code)
-            | 
-            | 
-            | class Secrets(object):
-            |     def __init__(self, key_dir):
-            |         self.key = open(path.join(key_dir, 'key.txt')).read().strip()
-            |         self.secret = open(path.join(key_dir, 'secret.txt')).read().strip()
-            |         self.token = open(path.join(key_dir, 'token.txt')).read().strip()
-            |         self.token_secret = open(path.join(key_dir, 'token_secret.txt')).read().strip()
-            | 
-            | 
-
-        details
-          summary: h4 Command-line interface
-
-          pre.language-python: code
-
-            | def main(keys_dir, term, target_loc, reject_loc, min_prob=-20, min_match=0.8, max_reject=0.5):
-            |     # We don't need the parser for this demo, so may as well save the loading time
-            |     nlp = spacy.en.English(Parser=None)
-            |     get_target = lambda: Meaning.from_path(nlp, target_loc)
-            |     get_reject = lambda: Meaning.from_path(nlp, reject_loc)
-            |     matcher = TextMatcher(nlp, get_target, get_reject, min_prob, min_match, max_reject)
-            | 
-            |     twitter = Connection(keys_dir, matcher, print_colored)
-            |     twitter.statuses.filter(track=term)
-            | 
-            | 
-            | if __name__ == '__main__':
-            |     plac.call(main)
-            |   
-
-  footer(role='contentinfo')
-  script(src='js/prism.js')
-
--- a/docs/redesign/tutorials.jade
+++ b/docs/redesign/tutorials.jade
@ -1,29 +0,0 @@
-mixin Tutorial(title)
-  details
-    summary
-      h4= title 
-
-    block
-
-+Tutorial("Mark-up all manner adverbs, especially for verbs of speech")
-  | Let's say you're developing a proofreading tool, or possibly an IDE for
-  | writers.  You're convinced by Stephen King's advice that 
-  | adverbs are not your friend
-  | so you want to 
-  a.readmore(href='tute_adverbs.html') 
-    | highlight all adverbs. ►
-
-+Tutorial("Search Reddit for comments about Google doing something")
-  | Example use of the spaCy NLP tools for data exploration.
-  | Here we will look for Reddit comments that describe Google doing something,
-  | i.e. discuss the company's actions. This is difficult, because other
-  | senses of "Google" now dominate usage of the word in conversation,
-  | particularly references to using Google products. 
-  a.readmore(href='tute_adverbs.html') 
-    | ►
-
-+Tutorial("Use word vectors for semantic search of Twitter")
-  | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore.
-  | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore.
-  a.readmore(href='tute_twitter.html') 
-    | ►
--- a/docs/redesign/usage_examples.jade
+++ b/docs/redesign/usage_examples.jade
@ -1,167 +0,0 @@
-mixin example(name)
-  details
-    summary
-      h4= name
-    block
-
-
-+example("Load resources and process text")
-  pre.language-python: code
-    | from __future__ import unicode_literals, print_function
-    | from spacy.en import English
-    | nlp = English()
-    | doc = nlp('Hello, world. Here are two sentences.')
-
-+example("Get tokens and sentences")
-  pre.language-python: code
-    | token = doc[0]
-    | sentence = doc.sents[0]
-    | assert token[0] is sentence[0]
-
-+example("Use integer IDs for any string")
-  pre.language-python: code
-    | hello_id = nlp.vocab.strings['Hello']
-    | hello_str = nlp.vocab.strings[hello_id]
-    | 
-    | assert token.orth  == hello_id  == 52
-    | assert token.orth_ == hello_str == 'Hello'
-
-+example("Get and set string views and flags")
-  pre.language-python: code
-    | assert token.shape_ == 'Xxxx'
-    | for lexeme in nlp.vocab:
-    |     if lexeme.is_alpha:
-    |         lexeme.shape_ = 'W'
-    |     elif lexeme.is_digit:
-    |         lexeme.shape_ = 'D'
-    |     elif lexeme.is_punct:
-    |         lexeme.shape_ = 'P'
-    |     else:
-    |         lexeme.shape_ = 'M'
-    | assert token.shape_ == 'W'
-
-+example("Export to numpy arrays")
-  pre.language-python: code
-    | from spacy.en.attrs import ORTH, LIKE_URL, IS_OOV
-    | 
-    | attr_ids = [ORTH, LIKE_URL, IS_OOV]
-    | doc_array = doc.to_array(attr_ids)
-    | assert doc_array.shape == (len(doc), len(attrs)
-    | assert doc[0].orth == doc_array[0, 0]
-    | assert doc[1].orth == doc_array[1, 0]
-    | assert doc[0].like_url == doc_array[0, 1]
-    | assert doc_array[, 1] == [t.like_url for t in doc]
-
-+example("Word vectors")
-  pre.language-python: code
-    | doc = nlp("Apples and oranges are similar. Boots and hippos aren't.")
-    | 
-    | apples = doc[0]
-    | oranges = doc[1]
-    | boots = doc[6]
-    | hippos = doc[8]
-    | 
-    | assert apples.similarity(oranges) > boots.similarity(hippos)
-
-
-+example("Part-of-speech tags")
-  pre.language-python: code
-    | from spacy.parts_of_speech import ADV
-    | 
-    | def is_adverb(token):
-    |     return token.pos == spacy.parts_of_speech.ADV
-    | 
-    | # These are data-specific, so no constants are provided. You have to look
-    | # up the IDs from the StringStore.
-    | NNS = nlp.vocab.strings['NNS']
-    | NNPS = nlp.vocab.strings['NNPS']
-    | def is_plural_noun(token):
-    |     return token.tag == NNS or token.tag == NNPS
-    | 
-    | def print_coarse_pos(token):
-    |     print(token.pos_)
-    | 
-    | def print_fine_pos(token):
-    |     print(token.tag_)
-
-+example("Syntactic dependencies")
-  pre.language-python: code
-    | def dependency_labels_to_root(token):
-    |     '''Walk up the syntactic tree, collecting the arc labels.'''
-    |     dep_labels = []
-    |     while token.root is not token:
-    |         dep_labels.append(token.dep)
-    |         token = token.head
-    |     return dep_labels
-
-+example("Named entities")
-  pre.language-python: code
-    | def iter_products(docs):
-    |     for doc in docs:
-    |         for ent in doc.ents:
-    |             if ent.label_ == 'PRODUCT':
-    |                 yield ent
-    | 
-    | def word_is_in_entity(word):
-    |     return word.ent_type != 0
-    | 
-    | def count_parent_verb_by_person(docs):
-    |     counts = defaultdict(defaultdict(int))
-    |     for doc in docs:
-    |         for ent in doc.ents:
-    |             if ent.label_ == 'PERSON' and ent.root.head.pos == VERB:
-    |                 counts[ent.orth_][ent.root.head.lemma_] += 1
-    |     return counts
-
-  //+example("Define custom NER rules")
-  //  pre.language-python: code
-  //    | nlp.matcher
-
-
-+example("Calculate inline mark-up on original string")
-  pre.language-python: code
-    | def put_spans_around_tokens(doc, get_classes):
-    |     '''Given some function to compute class names, put each token in a
-    |     span element, with the appropriate classes computed.
-    |  
-    |     All whitespace is preserved, outside of the spans. (Yes, I know HTML
-    |     won't display it. But the point is no information is lost, so you can
-    |     calculate what you need, e.g. <br /> tags, <p> tags, etc.)
-    |     '''
-    |     output = []
-    |     template = '&lt;span classes="{classes}"&gt;{word}&lt;/span&gt;{space}'
-    |     for token in doc:
-    |         if token.is_space:
-    |             output.append(token.orth_)
-    |         else:
-    |             output.append(
-    |               template.format(
-    |                 classes=' '.join(get_classes(token)),
-    |                 word=token.orth_,
-    |                 space=token.whitespace_))
-    |     string = ''.join(output)
-    |     string = string.replace('\n', '<br />')
-    |     string = string.replace('\t', '&nbsp;&nbsp;&nbsp;&nbsp;'
-    |     return string
-
-
-+example("Efficient binary serialization")
-  pre.language-python: code
-    | 
-    | byte_string = doc.as_bytes()
-    | open('/tmp/moby_dick.bin', 'wb').write(byte_string)
-    | 
-    | nlp = spacy.en.English()
-    | for byte_string in Doc.read(open('/tmp/moby_dick.bin', 'rb')):
-    |    doc = Doc(nlp.vocab)
-    |    doc.from_bytes(byte_string)
-
-
-p
-  | See the 
-  a(href="docs.html") docs page 
-  | for 
-  a(href="docs.html#api") API documentation, 
-  a(href="docs.html#tutorials") tutorials, 
-  | and 
-  a(href="docs.html#spec") annotation specs.