Normalize IS_SENT_START to SENT_START for Matcher (#5080)

This commit is contained in:
adrianeboyd 2020-03-03 12:22:39 +01:00 committed by GitHub
parent 2281c4708c
commit 697bec764d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 8 additions and 0 deletions

View File

@ -170,6 +170,10 @@ TOKEN_PATTERN_SCHEMA = {
"title": "Token is the first in a sentence", "title": "Token is the first in a sentence",
"$ref": "#/definitions/boolean_value", "$ref": "#/definitions/boolean_value",
}, },
"SENT_START": {
"title": "Token is the first in a sentence",
"$ref": "#/definitions/boolean_value",
},
"LIKE_NUM": { "LIKE_NUM": {
"title": "Token resembles a number", "title": "Token resembles a number",
"$ref": "#/definitions/boolean_value", "$ref": "#/definitions/boolean_value",

View File

@ -670,6 +670,8 @@ def _get_attr_values(spec, string_store):
continue continue
if attr == "TEXT": if attr == "TEXT":
attr = "ORTH" attr = "ORTH"
if attr == "IS_SENT_START":
attr = "SENT_START"
if attr not in TOKEN_PATTERN_SCHEMA["items"]["properties"]: if attr not in TOKEN_PATTERN_SCHEMA["items"]["properties"]:
raise ValueError(Errors.E152.format(attr=attr)) raise ValueError(Errors.E152.format(attr=attr))
attr = IDS.get(attr) attr = IDS.get(attr)

View File

@ -34,6 +34,8 @@ TEST_PATTERNS = [
([{"LOWER": {"REGEX": "^X", "NOT_IN": ["XXX", "XY"]}}], 0, 0), ([{"LOWER": {"REGEX": "^X", "NOT_IN": ["XXX", "XY"]}}], 0, 0),
([{"NORM": "a"}, {"POS": {"IN": ["NOUN"]}}], 0, 0), ([{"NORM": "a"}, {"POS": {"IN": ["NOUN"]}}], 0, 0),
([{"_": {"foo": {"NOT_IN": ["bar", "baz"]}, "a": 5, "b": {">": 10}}}], 0, 0), ([{"_": {"foo": {"NOT_IN": ["bar", "baz"]}, "a": 5, "b": {">": 10}}}], 0, 0),
([{"IS_SENT_START": True}], 0, 0),
([{"SENT_START": True}], 0, 0),
] ]
XFAIL_TEST_PATTERNS = [([{"orth": "foo"}], 0, 0)] XFAIL_TEST_PATTERNS = [([{"orth": "foo"}], 0, 0)]