Fix tests

This commit is contained in:
Ines Montani 2020-09-04 14:05:55 +02:00
parent 90043a6f9b
commit d7cc2ee72d
4 changed files with 15 additions and 14 deletions

View File

@ -57,12 +57,13 @@ def validate_token_pattern(obj: list) -> List[str]:
class TokenPatternString(BaseModel):
REGEX: Optional[StrictStr]
IN: Optional[List[StrictStr]]
NOT_IN: Optional[List[StrictStr]]
REGEX: Optional[StrictStr] = Field(None, alias="regex")
IN: Optional[List[StrictStr]] = Field(None, alias="in")
NOT_IN: Optional[List[StrictStr]] = Field(None, alias="not_in")
class Config:
extra = "forbid"
allow_population_by_field_name = True # allow alias and field name
@validator("*", pre=True, each_item=True, allow_reuse=True)
def raise_for_none(cls, v):
@ -72,9 +73,9 @@ class TokenPatternString(BaseModel):
class TokenPatternNumber(BaseModel):
REGEX: Optional[StrictStr] = None
IN: Optional[List[StrictInt]] = None
NOT_IN: Optional[List[StrictInt]] = None
REGEX: Optional[StrictStr] = Field(None, alias="regex")
IN: Optional[List[StrictInt]] = Field(None, alias="in")
NOT_IN: Optional[List[StrictInt]] = Field(None, alias="not_in")
EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
@ -84,6 +85,7 @@ class TokenPatternNumber(BaseModel):
class Config:
extra = "forbid"
allow_population_by_field_name = True # allow alias and field name
@validator("*", pre=True, each_item=True, allow_reuse=True)
def raise_for_none(cls, v):

View File

@ -46,7 +46,7 @@ def ca_tokenizer():
@pytest.fixture(scope="session")
def cs_tokenizer():
return get_lang_class("cs").Defaults.create_tokenizer()
return get_lang_class("cs")().tokenizer
@pytest.fixture(scope="session")
@ -211,7 +211,7 @@ def ru_lemmatizer():
@pytest.fixture(scope="session")
def sa_tokenizer():
return get_lang_class("sa").Defaults.create_tokenizer()
return get_lang_class("sa")().tokenizer
@pytest.fixture(scope="session")

View File

@ -61,10 +61,10 @@ def test_minimal_pattern_validation(en_vocab, pattern, n_errors, n_min_errors):
matcher.add("TEST", [pattern])
def test_pattern_warnings(en_vocab):
def test_pattern_errors(en_vocab):
matcher = Matcher(en_vocab)
# normalize "regex" to upper like "text"
matcher.add("TEST1", [[{"text": {"regex": "regex"}}]])
# warn if subpattern attribute isn't recognized and processed
with pytest.warns(UserWarning):
# error if subpattern attribute isn't recognized and processed
with pytest.raises(MatchPatternError):
matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]])

View File

@ -1,18 +1,17 @@
from spacy.lang.en import English
from spacy.pipeline import merge_entities, EntityRuler
from spacy.pipeline import merge_entities
def test_issue5918():
# Test edge case when merging entities.
nlp = English()
ruler = nlp.add_pipe("entity_ruler")
patterns = [
{"label": "ORG", "pattern": "Digicon Inc"},
{"label": "ORG", "pattern": "Rotan Mosle Inc's"},
{"label": "ORG", "pattern": "Rotan Mosle Technology Partners Ltd"},
]
ruler = EntityRuler(nlp)
ruler.add_patterns(patterns)
nlp.add_pipe(ruler)
text = """
Digicon Inc said it has completed the previously-announced disposition