From a8831a85e49c1f7777b4ded60c199ca73a503caf Mon Sep 17 00:00:00 2001 From: Sasho Savkov Date: Fri, 11 Nov 2016 17:12:56 +0000 Subject: [PATCH] Added missing brackets & suggested import statmnt There are two missing brackets on the `add_pattern` lines. I also suggest you include the `from spacy.tokens.doc import Doc` statement to make it easy for people to copy paste a working example. --- website/docs/usage/rule-based-matching.jade | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/website/docs/usage/rule-based-matching.jade b/website/docs/usage/rule-based-matching.jade index c9cbc13d9..41fa0bdd3 100644 --- a/website/docs/usage/rule-based-matching.jade +++ b/website/docs/usage/rule-based-matching.jade @@ -92,6 +92,7 @@ p | return a 4-tuple #[code (ent_id, label, start, end)]. +code. + from spacy.tokens.doc import Doc def trim_title(doc, ent_id, label, start, end): if doc[start].check_flag(IS_TITLE_TERM): return (ent_id, label, start+1, end) @@ -100,8 +101,8 @@ p titles = set(title.lower() for title in [u'Mr.', 'Dr.', 'Ms.', u'Admiral']) IS_TITLE_TERM = matcher.vocab.add_flag(lambda string: string.lower() in titles) matcher.add_entity('PersonName', acceptor=trim_title) - matcher.add_pattern('PersonName', {LOWER: 'mr.'}, {LOWER: 'cruise'}]) - matcher.add_pattern('PersonName', {LOWER: 'dr.'}, {LOWER: 'seuss'}]) + matcher.add_pattern('PersonName', [{LOWER: 'mr.'}, {LOWER: 'cruise'}]) + matcher.add_pattern('PersonName', [{LOWER: 'dr.'}, {LOWER: 'seuss'}]) doc = Doc(matcher.vocab, words=[u'Mr.', u'Cruise', u'likes', 'Dr.', u'Seuss']) for ent_id, label, start, end in matcher(doc): print(doc[start:end].text)