Merge pull request #622 from savkov/patch-2

Added missing brackets & suggested import statmnt
This commit is contained in:
Matthew Honnibal 2016-11-12 04:21:04 +11:00 committed by GitHub
commit f27db1d9b5

View File

@ -92,6 +92,7 @@ p
| return a 4-tuple #[code (ent_id, label, start, end)].
+code.
from spacy.tokens.doc import Doc
def trim_title(doc, ent_id, label, start, end):
if doc[start].check_flag(IS_TITLE_TERM):
return (ent_id, label, start+1, end)
@ -100,8 +101,8 @@ p
titles = set(title.lower() for title in [u'Mr.', 'Dr.', 'Ms.', u'Admiral'])
IS_TITLE_TERM = matcher.vocab.add_flag(lambda string: string.lower() in titles)
matcher.add_entity('PersonName', acceptor=trim_title)
matcher.add_pattern('PersonName', {LOWER: 'mr.'}, {LOWER: 'cruise'}])
matcher.add_pattern('PersonName', {LOWER: 'dr.'}, {LOWER: 'seuss'}])
matcher.add_pattern('PersonName', [{LOWER: 'mr.'}, {LOWER: 'cruise'}])
matcher.add_pattern('PersonName', [{LOWER: 'dr.'}, {LOWER: 'seuss'}])
doc = Doc(matcher.vocab, words=[u'Mr.', u'Cruise', u'likes', 'Dr.', u'Seuss'])
for ent_id, label, start, end in matcher(doc):
print(doc[start:end].text)