diff --git a/README.rst b/README.rst index 389b4ea14..8a97d6de2 100644 --- a/README.rst +++ b/README.rst @@ -179,6 +179,11 @@ Install a version of Visual Studio Express or higher that matches the version that was used to compile your Python interpreter. For official distributions these are VS 2008 (Python 2.7), VS 2010 (Python 3.4) and VS 2015 (Python 3.5). +If you don't want to install the entire Visual Studio, you can install a +stand-alone compiler. Make sure that you install the correct version for +your version of Python. See https://wiki.python.org/moin/WindowsCompilers for +links to download these. + Run tests ========= diff --git a/website/docs/tutorials/rule-based-matcher.jade b/website/docs/tutorials/rule-based-matcher.jade index 8c8949631..900a86a63 100644 --- a/website/docs/tutorials/rule-based-matcher.jade +++ b/website/docs/tutorials/rule-based-matcher.jade @@ -9,6 +9,18 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens. nlp = spacy.load('en', parser=False, entity=False) + def merge_phrases(matcher, doc, i, matches): + ''' + Merge a phrase. We have to be careful here because we'll change the token indices. + To avoid problems, merge all the phrases once we're called on the last match. + ''' + if i != len(matches)-1: + return None + # Get Span objects + spans = [(ent_id, label, doc[start : end]) for ent_id, label, start, end in matches] + for ent_id, label, span in spans: + span.merge(label=label, tag='NNP' if label else span.root.tag_) + matcher = Matcher(nlp.vocab) matcher.add_entity( @@ -17,6 +29,7 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens. acceptor=None, # Accept or modify the match on_match=merge_phrases # Callback to act on the matches ) + matcher.add_pattern( "GoogleNow", # Entity ID -- Created if doesn't exist. [ # The pattern is a list of *Token Specifiers*. @@ -32,7 +45,7 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens. doc = nlp(u"I prefer Siri to Google Now.") matches = matcher(doc) for ent_id, label, start, end in matches: - print(nlp.strings[ent_id], nlp.strings[label], doc[start : end].text) + print(nlp.vocab.strings[ent_id], nlp.vocab.strings[label], doc[start : end].text) entity = matcher.get_entity(ent_id) print(entity)