diff --git a/spacy/schemas.py b/spacy/schemas.py index be8db6a99..59af53301 100644 --- a/spacy/schemas.py +++ b/spacy/schemas.py @@ -57,12 +57,13 @@ def validate_token_pattern(obj: list) -> List[str]: class TokenPatternString(BaseModel): - REGEX: Optional[StrictStr] - IN: Optional[List[StrictStr]] - NOT_IN: Optional[List[StrictStr]] + REGEX: Optional[StrictStr] = Field(None, alias="regex") + IN: Optional[List[StrictStr]] = Field(None, alias="in") + NOT_IN: Optional[List[StrictStr]] = Field(None, alias="not_in") class Config: extra = "forbid" + allow_population_by_field_name = True # allow alias and field name @validator("*", pre=True, each_item=True, allow_reuse=True) def raise_for_none(cls, v): @@ -72,9 +73,9 @@ class TokenPatternString(BaseModel): class TokenPatternNumber(BaseModel): - REGEX: Optional[StrictStr] = None - IN: Optional[List[StrictInt]] = None - NOT_IN: Optional[List[StrictInt]] = None + REGEX: Optional[StrictStr] = Field(None, alias="regex") + IN: Optional[List[StrictInt]] = Field(None, alias="in") + NOT_IN: Optional[List[StrictInt]] = Field(None, alias="not_in") EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==") NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=") GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=") @@ -84,6 +85,7 @@ class TokenPatternNumber(BaseModel): class Config: extra = "forbid" + allow_population_by_field_name = True # allow alias and field name @validator("*", pre=True, each_item=True, allow_reuse=True) def raise_for_none(cls, v): diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index 3cc6bad75..e17199a08 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -46,7 +46,7 @@ def ca_tokenizer(): @pytest.fixture(scope="session") def cs_tokenizer(): - return get_lang_class("cs").Defaults.create_tokenizer() + return get_lang_class("cs")().tokenizer @pytest.fixture(scope="session") @@ -211,7 +211,7 @@ def ru_lemmatizer(): @pytest.fixture(scope="session") def sa_tokenizer(): - return get_lang_class("sa").Defaults.create_tokenizer() + return get_lang_class("sa")().tokenizer @pytest.fixture(scope="session") diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py index 79300374e..4d21aea81 100644 --- a/spacy/tests/matcher/test_pattern_validation.py +++ b/spacy/tests/matcher/test_pattern_validation.py @@ -61,10 +61,10 @@ def test_minimal_pattern_validation(en_vocab, pattern, n_errors, n_min_errors): matcher.add("TEST", [pattern]) -def test_pattern_warnings(en_vocab): +def test_pattern_errors(en_vocab): matcher = Matcher(en_vocab) # normalize "regex" to upper like "text" matcher.add("TEST1", [[{"text": {"regex": "regex"}}]]) - # warn if subpattern attribute isn't recognized and processed - with pytest.warns(UserWarning): + # error if subpattern attribute isn't recognized and processed + with pytest.raises(MatchPatternError): matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]]) diff --git a/spacy/tests/regression/test_issue5918.py b/spacy/tests/regression/test_issue5918.py index 3b96009a8..66280f012 100644 --- a/spacy/tests/regression/test_issue5918.py +++ b/spacy/tests/regression/test_issue5918.py @@ -1,18 +1,17 @@ from spacy.lang.en import English -from spacy.pipeline import merge_entities, EntityRuler +from spacy.pipeline import merge_entities def test_issue5918(): # Test edge case when merging entities. nlp = English() + ruler = nlp.add_pipe("entity_ruler") patterns = [ {"label": "ORG", "pattern": "Digicon Inc"}, {"label": "ORG", "pattern": "Rotan Mosle Inc's"}, {"label": "ORG", "pattern": "Rotan Mosle Technology Partners Ltd"}, ] - ruler = EntityRuler(nlp) ruler.add_patterns(patterns) - nlp.add_pipe(ruler) text = """ Digicon Inc said it has completed the previously-announced disposition