From 721f4554c8671c991aa9a25684bd5dc6877f1b1a Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Thu, 2 Sep 2021 09:26:33 +0200 Subject: [PATCH] matcher doc corrections (#9115) * update error message to current UX * clarify uppercase effect * fix docstring --- spacy/matcher/matcher.pyx | 2 +- website/docs/usage/rule-based-matching.md | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 555766f62..be45dcaad 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -340,7 +340,7 @@ cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, e The "predicates" list contains functions that take a Python list and return a boolean value. It's mostly used for regular expressions. - The "extra_getters" list contains functions that take a Python list and return + The "extensions" list contains functions that take a Python list and return an attr ID. It's mostly used for extension attributes. """ cdef vector[PatternStateC] states diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md index 81c838584..74bb10304 100644 --- a/website/docs/usage/rule-based-matching.md +++ b/website/docs/usage/rule-based-matching.md @@ -429,7 +429,7 @@ matcher.add("HelloWorld", [pattern]) # 🚨 Raises an error: # MatchPatternError: Invalid token patterns for matcher rule 'HelloWorld' # Pattern 0: -# - Additional properties are not allowed ('CASEINSENSITIVE' was unexpected) [2] +# - [pattern -> 2 -> CASEINSENSITIVE] extra fields not permitted ``` @@ -438,7 +438,8 @@ matcher.add("HelloWorld", [pattern]) To move on to a more realistic example, let's say you're working with a large corpus of blog articles, and you want to match all mentions of "Google I/O" (which spaCy tokenizes as `['Google', 'I', '/', 'O'`]). To be safe, you only -match on the uppercase versions, in case someone has written it as "Google i/o". +match on the uppercase versions, avoiding matches with phrases such as "Google +i/o". ```python ### {executable="true"}