mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-28 02:04:07 +03:00
Fix tests
This commit is contained in:
parent
90043a6f9b
commit
d7cc2ee72d
|
@ -57,12 +57,13 @@ def validate_token_pattern(obj: list) -> List[str]:
|
||||||
|
|
||||||
|
|
||||||
class TokenPatternString(BaseModel):
|
class TokenPatternString(BaseModel):
|
||||||
REGEX: Optional[StrictStr]
|
REGEX: Optional[StrictStr] = Field(None, alias="regex")
|
||||||
IN: Optional[List[StrictStr]]
|
IN: Optional[List[StrictStr]] = Field(None, alias="in")
|
||||||
NOT_IN: Optional[List[StrictStr]]
|
NOT_IN: Optional[List[StrictStr]] = Field(None, alias="not_in")
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
extra = "forbid"
|
extra = "forbid"
|
||||||
|
allow_population_by_field_name = True # allow alias and field name
|
||||||
|
|
||||||
@validator("*", pre=True, each_item=True, allow_reuse=True)
|
@validator("*", pre=True, each_item=True, allow_reuse=True)
|
||||||
def raise_for_none(cls, v):
|
def raise_for_none(cls, v):
|
||||||
|
@ -72,9 +73,9 @@ class TokenPatternString(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class TokenPatternNumber(BaseModel):
|
class TokenPatternNumber(BaseModel):
|
||||||
REGEX: Optional[StrictStr] = None
|
REGEX: Optional[StrictStr] = Field(None, alias="regex")
|
||||||
IN: Optional[List[StrictInt]] = None
|
IN: Optional[List[StrictInt]] = Field(None, alias="in")
|
||||||
NOT_IN: Optional[List[StrictInt]] = None
|
NOT_IN: Optional[List[StrictInt]] = Field(None, alias="not_in")
|
||||||
EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
|
EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
|
||||||
NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
|
NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
|
||||||
GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
|
GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
|
||||||
|
@ -84,6 +85,7 @@ class TokenPatternNumber(BaseModel):
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
extra = "forbid"
|
extra = "forbid"
|
||||||
|
allow_population_by_field_name = True # allow alias and field name
|
||||||
|
|
||||||
@validator("*", pre=True, each_item=True, allow_reuse=True)
|
@validator("*", pre=True, each_item=True, allow_reuse=True)
|
||||||
def raise_for_none(cls, v):
|
def raise_for_none(cls, v):
|
||||||
|
|
|
@ -46,7 +46,7 @@ def ca_tokenizer():
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def cs_tokenizer():
|
def cs_tokenizer():
|
||||||
return get_lang_class("cs").Defaults.create_tokenizer()
|
return get_lang_class("cs")().tokenizer
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
|
@ -211,7 +211,7 @@ def ru_lemmatizer():
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def sa_tokenizer():
|
def sa_tokenizer():
|
||||||
return get_lang_class("sa").Defaults.create_tokenizer()
|
return get_lang_class("sa")().tokenizer
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
|
|
|
@ -61,10 +61,10 @@ def test_minimal_pattern_validation(en_vocab, pattern, n_errors, n_min_errors):
|
||||||
matcher.add("TEST", [pattern])
|
matcher.add("TEST", [pattern])
|
||||||
|
|
||||||
|
|
||||||
def test_pattern_warnings(en_vocab):
|
def test_pattern_errors(en_vocab):
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
# normalize "regex" to upper like "text"
|
# normalize "regex" to upper like "text"
|
||||||
matcher.add("TEST1", [[{"text": {"regex": "regex"}}]])
|
matcher.add("TEST1", [[{"text": {"regex": "regex"}}]])
|
||||||
# warn if subpattern attribute isn't recognized and processed
|
# error if subpattern attribute isn't recognized and processed
|
||||||
with pytest.warns(UserWarning):
|
with pytest.raises(MatchPatternError):
|
||||||
matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]])
|
matcher.add("TEST2", [[{"TEXT": {"XX": "xx"}}]])
|
||||||
|
|
|
@ -1,18 +1,17 @@
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.pipeline import merge_entities, EntityRuler
|
from spacy.pipeline import merge_entities
|
||||||
|
|
||||||
|
|
||||||
def test_issue5918():
|
def test_issue5918():
|
||||||
# Test edge case when merging entities.
|
# Test edge case when merging entities.
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
ruler = nlp.add_pipe("entity_ruler")
|
||||||
patterns = [
|
patterns = [
|
||||||
{"label": "ORG", "pattern": "Digicon Inc"},
|
{"label": "ORG", "pattern": "Digicon Inc"},
|
||||||
{"label": "ORG", "pattern": "Rotan Mosle Inc's"},
|
{"label": "ORG", "pattern": "Rotan Mosle Inc's"},
|
||||||
{"label": "ORG", "pattern": "Rotan Mosle Technology Partners Ltd"},
|
{"label": "ORG", "pattern": "Rotan Mosle Technology Partners Ltd"},
|
||||||
]
|
]
|
||||||
ruler = EntityRuler(nlp)
|
|
||||||
ruler.add_patterns(patterns)
|
ruler.add_patterns(patterns)
|
||||||
nlp.add_pipe(ruler)
|
|
||||||
|
|
||||||
text = """
|
text = """
|
||||||
Digicon Inc said it has completed the previously-announced disposition
|
Digicon Inc said it has completed the previously-announced disposition
|
||||||
|
|
Loading…
Reference in New Issue
Block a user