From 697bec764de41e39582caadc14608607c2af8d09 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Tue, 3 Mar 2020 12:22:39 +0100 Subject: [PATCH] Normalize IS_SENT_START to SENT_START for Matcher (#5080) --- spacy/matcher/_schemas.py | 4 ++++ spacy/matcher/matcher.pyx | 2 ++ spacy/tests/matcher/test_pattern_validation.py | 2 ++ 3 files changed, 8 insertions(+) diff --git a/spacy/matcher/_schemas.py b/spacy/matcher/_schemas.py index 1b10f0dd5..4ef7ae49a 100644 --- a/spacy/matcher/_schemas.py +++ b/spacy/matcher/_schemas.py @@ -170,6 +170,10 @@ TOKEN_PATTERN_SCHEMA = { "title": "Token is the first in a sentence", "$ref": "#/definitions/boolean_value", }, + "SENT_START": { + "title": "Token is the first in a sentence", + "$ref": "#/definitions/boolean_value", + }, "LIKE_NUM": { "title": "Token resembles a number", "$ref": "#/definitions/boolean_value", diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 30ef3dd36..11461afb8 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -670,6 +670,8 @@ def _get_attr_values(spec, string_store): continue if attr == "TEXT": attr = "ORTH" + if attr == "IS_SENT_START": + attr = "SENT_START" if attr not in TOKEN_PATTERN_SCHEMA["items"]["properties"]: raise ValueError(Errors.E152.format(attr=attr)) attr = IDS.get(attr) diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py index 2db2f9eb3..c536698d0 100644 --- a/spacy/tests/matcher/test_pattern_validation.py +++ b/spacy/tests/matcher/test_pattern_validation.py @@ -34,6 +34,8 @@ TEST_PATTERNS = [ ([{"LOWER": {"REGEX": "^X", "NOT_IN": ["XXX", "XY"]}}], 0, 0), ([{"NORM": "a"}, {"POS": {"IN": ["NOUN"]}}], 0, 0), ([{"_": {"foo": {"NOT_IN": ["bar", "baz"]}, "a": 5, "b": {">": 10}}}], 0, 0), + ([{"IS_SENT_START": True}], 0, 0), + ([{"SENT_START": True}], 0, 0), ] XFAIL_TEST_PATTERNS = [([{"orth": "foo"}], 0, 0)]