Fix tests

2025-08-09 06:34:54 +03:00 · 2020-09-04 14:05:55 +02:00 · 2020-09-04 14:05:55 +02:00 · d7cc2ee72d
commit d7cc2ee72d
parent 90043a6f9b
4 changed files with 15 additions and 14 deletions
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@ -57,12 +57,13 @@ def validate_token_pattern(obj: list) -> List[str]:


 class TokenPatternString(BaseModel):
-    REGEX: Optional[StrictStr]
-    IN: Optional[List[StrictStr]]
-    NOT_IN: Optional[List[StrictStr]]
+    REGEX: Optional[StrictStr] = Field(None, alias="regex")
+    IN: Optional[List[StrictStr]] = Field(None, alias="in")
+    NOT_IN: Optional[List[StrictStr]] = Field(None, alias="not_in")

    class Config:
        extra = "forbid"
+        allow_population_by_field_name = True  # allow alias and field name

    @validator("*", pre=True, each_item=True, allow_reuse=True)
    def raise_for_none(cls, v):
@ -72,9 +73,9 @@ class TokenPatternString(BaseModel):


 class TokenPatternNumber(BaseModel):
-    REGEX: Optional[StrictStr] = None
-    IN: Optional[List[StrictInt]] = None
-    NOT_IN: Optional[List[StrictInt]] = None
+    REGEX: Optional[StrictStr] = Field(None, alias="regex")
+    IN: Optional[List[StrictInt]] = Field(None, alias="in")
+    NOT_IN: Optional[List[StrictInt]] = Field(None, alias="not_in")
    EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
    NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
    GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
@ -84,6 +85,7 @@ class TokenPatternNumber(BaseModel):

    class Config:
        extra = "forbid"
+        allow_population_by_field_name = True  # allow alias and field name

    @validator("*", pre=True, each_item=True, allow_reuse=True)
    def raise_for_none(cls, v):
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@ -46,7 +46,7 @@ def ca_tokenizer():

@pytest.fixture(scope="session")
 def cs_tokenizer():
-    return get_lang_class("cs").Defaults.create_tokenizer()
+    return get_lang_class("cs")().tokenizer


@pytest.fixture(scope="session")
@ -211,7 +211,7 @@ def ru_lemmatizer():

@pytest.fixture(scope="session")
 def sa_tokenizer():
-    return get_lang_class("sa").Defaults.create_tokenizer()
+    return get_lang_class("sa")().tokenizer


@pytest.fixture(scope="session")
--- a/spacy/tests/matcher/test_pattern_validation.py
+++ b/spacy/tests/matcher/test_pattern_validation.py
@ -61,10 +61,10 @@ def test_minimal_pattern_validation(en_vocab, pattern, n_errors, n_min_errors):
        matcher.add("TEST", [pattern])


-def test_pattern_warnings(en_vocab):
+def test_pattern_errors(en_vocab):
    matcher = Matcher(en_vocab)
    # normalize "regex" to upper like "text"
    matcher.add("TEST1", [[{"text": {"regex": "regex"}}]])
-    # warn if subpattern attribute isn't recognized and processed
-    with pytest.warns(UserWarning):
+    # error if subpattern attribute isn't recognized and processed
+    with pytest.raises(MatchPatternError):
        matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]])
--- a/spacy/tests/regression/test_issue5918.py
+++ b/spacy/tests/regression/test_issue5918.py
@ -1,18 +1,17 @@
 from spacy.lang.en import English
-from spacy.pipeline import merge_entities, EntityRuler
+from spacy.pipeline import merge_entities


 def test_issue5918():
    # Test edge case when merging entities.
    nlp = English()
+    ruler = nlp.add_pipe("entity_ruler")
    patterns = [
        {"label": "ORG", "pattern": "Digicon Inc"},
        {"label": "ORG", "pattern": "Rotan Mosle Inc's"},
        {"label": "ORG", "pattern": "Rotan Mosle Technology Partners Ltd"},
    ]
-    ruler = EntityRuler(nlp)
    ruler.add_patterns(patterns)
-    nlp.add_pipe(ruler)

    text = """
        Digicon Inc said it has completed the previously-announced disposition