Fix tests

This commit is contained in:
Ines Montani 2020-09-04 14:05:55 +02:00
parent 90043a6f9b
commit d7cc2ee72d
4 changed files with 15 additions and 14 deletions

View File

@ -57,12 +57,13 @@ def validate_token_pattern(obj: list) -> List[str]:
class TokenPatternString(BaseModel): class TokenPatternString(BaseModel):
REGEX: Optional[StrictStr] REGEX: Optional[StrictStr] = Field(None, alias="regex")
IN: Optional[List[StrictStr]] IN: Optional[List[StrictStr]] = Field(None, alias="in")
NOT_IN: Optional[List[StrictStr]] NOT_IN: Optional[List[StrictStr]] = Field(None, alias="not_in")
class Config: class Config:
extra = "forbid" extra = "forbid"
allow_population_by_field_name = True # allow alias and field name
@validator("*", pre=True, each_item=True, allow_reuse=True) @validator("*", pre=True, each_item=True, allow_reuse=True)
def raise_for_none(cls, v): def raise_for_none(cls, v):
@ -72,9 +73,9 @@ class TokenPatternString(BaseModel):
class TokenPatternNumber(BaseModel): class TokenPatternNumber(BaseModel):
REGEX: Optional[StrictStr] = None REGEX: Optional[StrictStr] = Field(None, alias="regex")
IN: Optional[List[StrictInt]] = None IN: Optional[List[StrictInt]] = Field(None, alias="in")
NOT_IN: Optional[List[StrictInt]] = None NOT_IN: Optional[List[StrictInt]] = Field(None, alias="not_in")
EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==") EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=") NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=") GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
@ -84,6 +85,7 @@ class TokenPatternNumber(BaseModel):
class Config: class Config:
extra = "forbid" extra = "forbid"
allow_population_by_field_name = True # allow alias and field name
@validator("*", pre=True, each_item=True, allow_reuse=True) @validator("*", pre=True, each_item=True, allow_reuse=True)
def raise_for_none(cls, v): def raise_for_none(cls, v):

View File

@ -46,7 +46,7 @@ def ca_tokenizer():
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def cs_tokenizer(): def cs_tokenizer():
return get_lang_class("cs").Defaults.create_tokenizer() return get_lang_class("cs")().tokenizer
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
@ -211,7 +211,7 @@ def ru_lemmatizer():
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def sa_tokenizer(): def sa_tokenizer():
return get_lang_class("sa").Defaults.create_tokenizer() return get_lang_class("sa")().tokenizer
@pytest.fixture(scope="session") @pytest.fixture(scope="session")

View File

@ -61,10 +61,10 @@ def test_minimal_pattern_validation(en_vocab, pattern, n_errors, n_min_errors):
matcher.add("TEST", [pattern]) matcher.add("TEST", [pattern])
def test_pattern_warnings(en_vocab): def test_pattern_errors(en_vocab):
matcher = Matcher(en_vocab) matcher = Matcher(en_vocab)
# normalize "regex" to upper like "text" # normalize "regex" to upper like "text"
matcher.add("TEST1", [[{"text": {"regex": "regex"}}]]) matcher.add("TEST1", [[{"text": {"regex": "regex"}}]])
# warn if subpattern attribute isn't recognized and processed # error if subpattern attribute isn't recognized and processed
with pytest.warns(UserWarning): with pytest.raises(MatchPatternError):
matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]]) matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]])

View File

@ -1,18 +1,17 @@
from spacy.lang.en import English from spacy.lang.en import English
from spacy.pipeline import merge_entities, EntityRuler from spacy.pipeline import merge_entities
def test_issue5918(): def test_issue5918():
# Test edge case when merging entities. # Test edge case when merging entities.
nlp = English() nlp = English()
ruler = nlp.add_pipe("entity_ruler")
patterns = [ patterns = [
{"label": "ORG", "pattern": "Digicon Inc"}, {"label": "ORG", "pattern": "Digicon Inc"},
{"label": "ORG", "pattern": "Rotan Mosle Inc's"}, {"label": "ORG", "pattern": "Rotan Mosle Inc's"},
{"label": "ORG", "pattern": "Rotan Mosle Technology Partners Ltd"}, {"label": "ORG", "pattern": "Rotan Mosle Technology Partners Ltd"},
] ]
ruler = EntityRuler(nlp)
ruler.add_patterns(patterns) ruler.add_patterns(patterns)
nlp.add_pipe(ruler)
text = """ text = """
Digicon Inc said it has completed the previously-announced disposition Digicon Inc said it has completed the previously-announced disposition