diff --git a/website/docs/usage/rule-based-matching.jade b/website/docs/usage/rule-based-matching.jade index c9cbc13d9..41fa0bdd3 100644 --- a/website/docs/usage/rule-based-matching.jade +++ b/website/docs/usage/rule-based-matching.jade @@ -92,6 +92,7 @@ p | return a 4-tuple #[code (ent_id, label, start, end)]. +code. + from spacy.tokens.doc import Doc def trim_title(doc, ent_id, label, start, end): if doc[start].check_flag(IS_TITLE_TERM): return (ent_id, label, start+1, end) @@ -100,8 +101,8 @@ p titles = set(title.lower() for title in [u'Mr.', 'Dr.', 'Ms.', u'Admiral']) IS_TITLE_TERM = matcher.vocab.add_flag(lambda string: string.lower() in titles) matcher.add_entity('PersonName', acceptor=trim_title) - matcher.add_pattern('PersonName', {LOWER: 'mr.'}, {LOWER: 'cruise'}]) - matcher.add_pattern('PersonName', {LOWER: 'dr.'}, {LOWER: 'seuss'}]) + matcher.add_pattern('PersonName', [{LOWER: 'mr.'}, {LOWER: 'cruise'}]) + matcher.add_pattern('PersonName', [{LOWER: 'dr.'}, {LOWER: 'seuss'}]) doc = Doc(matcher.vocab, words=[u'Mr.', u'Cruise', u'likes', 'Dr.', u'Seuss']) for ent_id, label, start, end in matcher(doc): print(doc[start:end].text)