Use nlp.make_doc instead of nlp for patterns [ci skip]

This commit is contained in:
Ines Montani 2018-12-08 11:56:01 +01:00
parent 7dd21b66d5
commit 8c0f0f50bc
3 changed files with 4 additions and 3 deletions

View File

@ -206,7 +206,8 @@ p
nlp = spacy.load('en_core_web_sm') nlp = spacy.load('en_core_web_sm')
matcher = PhraseMatcher(nlp.vocab) matcher = PhraseMatcher(nlp.vocab)
terminology_list = ['Barack Obama', 'Angela Merkel', 'Washington, D.C.'] terminology_list = ['Barack Obama', 'Angela Merkel', 'Washington, D.C.']
patterns = [nlp(text) for text in terminology_list] # Only run nlp.make_doc to speed things up
patterns = [nlp.make_doc(text) for text in terminology_list]
matcher.add('TerminologyList', None, *patterns) matcher.add('TerminologyList', None, *patterns)
doc = nlp(u"German Chancellor Angela Merkel and US President Barack Obama " doc = nlp(u"German Chancellor Angela Merkel and US President Barack Obama "

View File

@ -72,7 +72,7 @@ p
name = 'entity_matcher' name = 'entity_matcher'
def __init__(self, nlp, terms, label): def __init__(self, nlp, terms, label):
patterns = [nlp(text) for text in terms] patterns = [nlp.make_doc(text) for text in terms]
self.matcher = PhraseMatcher(nlp.vocab) self.matcher = PhraseMatcher(nlp.vocab)
self.matcher.add(label, None, *patterns) self.matcher.add(label, None, *patterns)

View File

@ -240,7 +240,7 @@ p
+code-new. +code-new.
from spacy.matcher import PhraseMatcher from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab) matcher = PhraseMatcher(nlp.vocab)
patterns = [nlp(text) for text in large_terminology_list] patterns = [nlp.make_doc(text) for text in large_terminology_list]
matcher.add('PRODUCT', None, *patterns) matcher.add('PRODUCT', None, *patterns)
+code-old. +code-old.