mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Merge branch 'master' of ssh://github.com/explosion/spaCy
This commit is contained in:
commit
7638f439e5
|
@ -179,6 +179,11 @@ Install a version of Visual Studio Express or higher that matches the version
|
||||||
that was used to compile your Python interpreter. For official distributions
|
that was used to compile your Python interpreter. For official distributions
|
||||||
these are VS 2008 (Python 2.7), VS 2010 (Python 3.4) and VS 2015 (Python 3.5).
|
these are VS 2008 (Python 2.7), VS 2010 (Python 3.4) and VS 2015 (Python 3.5).
|
||||||
|
|
||||||
|
If you don't want to install the entire Visual Studio, you can install a
|
||||||
|
stand-alone compiler. Make sure that you install the correct version for
|
||||||
|
your version of Python. See https://wiki.python.org/moin/WindowsCompilers for
|
||||||
|
links to download these.
|
||||||
|
|
||||||
Run tests
|
Run tests
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,18 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens.
|
||||||
|
|
||||||
nlp = spacy.load('en', parser=False, entity=False)
|
nlp = spacy.load('en', parser=False, entity=False)
|
||||||
|
|
||||||
|
def merge_phrases(matcher, doc, i, matches):
|
||||||
|
'''
|
||||||
|
Merge a phrase. We have to be careful here because we'll change the token indices.
|
||||||
|
To avoid problems, merge all the phrases once we're called on the last match.
|
||||||
|
'''
|
||||||
|
if i != len(matches)-1:
|
||||||
|
return None
|
||||||
|
# Get Span objects
|
||||||
|
spans = [(ent_id, label, doc[start : end]) for ent_id, label, start, end in matches]
|
||||||
|
for ent_id, label, span in spans:
|
||||||
|
span.merge(label=label, tag='NNP' if label else span.root.tag_)
|
||||||
|
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
|
|
||||||
matcher.add_entity(
|
matcher.add_entity(
|
||||||
|
@ -17,6 +29,7 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens.
|
||||||
acceptor=None, # Accept or modify the match
|
acceptor=None, # Accept or modify the match
|
||||||
on_match=merge_phrases # Callback to act on the matches
|
on_match=merge_phrases # Callback to act on the matches
|
||||||
)
|
)
|
||||||
|
|
||||||
matcher.add_pattern(
|
matcher.add_pattern(
|
||||||
"GoogleNow", # Entity ID -- Created if doesn't exist.
|
"GoogleNow", # Entity ID -- Created if doesn't exist.
|
||||||
[ # The pattern is a list of *Token Specifiers*.
|
[ # The pattern is a list of *Token Specifiers*.
|
||||||
|
@ -32,7 +45,7 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens.
|
||||||
doc = nlp(u"I prefer Siri to Google Now.")
|
doc = nlp(u"I prefer Siri to Google Now.")
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
for ent_id, label, start, end in matches:
|
for ent_id, label, start, end in matches:
|
||||||
print(nlp.strings[ent_id], nlp.strings[label], doc[start : end].text)
|
print(nlp.vocab.strings[ent_id], nlp.vocab.strings[label], doc[start : end].text)
|
||||||
entity = matcher.get_entity(ent_id)
|
entity = matcher.get_entity(ent_id)
|
||||||
print(entity)
|
print(entity)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user