mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-19 14:04:12 +03:00
e2b70df012
* Use isort with Black profile * isort all the things * Fix import cycles as a result of import sorting * Add DOCBIN_ALL_ATTRS type definition * Add isort to requirements * Remove isort from build dependencies check * Typo
46 lines
1.4 KiB
Python
46 lines
1.4 KiB
Python
from collections import defaultdict
|
|
from typing import Any, Dict, List, Union
|
|
|
|
from pydantic import BaseModel, Field, ValidationError
|
|
from pydantic.types import StrictBool, StrictInt, StrictStr
|
|
|
|
|
|
class MatchNodeSchema(BaseModel):
|
|
prefix_len: StrictInt = Field(..., title="Prefix length")
|
|
suffix_len: StrictInt = Field(..., title="Suffix length")
|
|
prefix_tree: StrictInt = Field(..., title="Prefix tree")
|
|
suffix_tree: StrictInt = Field(..., title="Suffix tree")
|
|
|
|
class Config:
|
|
extra = "forbid"
|
|
|
|
|
|
class SubstNodeSchema(BaseModel):
|
|
orig: Union[int, StrictStr] = Field(..., title="Original substring")
|
|
subst: Union[int, StrictStr] = Field(..., title="Replacement substring")
|
|
|
|
class Config:
|
|
extra = "forbid"
|
|
|
|
|
|
class EditTreeSchema(BaseModel):
|
|
__root__: Union[MatchNodeSchema, SubstNodeSchema]
|
|
|
|
|
|
def validate_edit_tree(obj: Dict[str, Any]) -> List[str]:
|
|
"""Validate edit tree.
|
|
|
|
obj (Dict[str, Any]): JSON-serializable data to validate.
|
|
RETURNS (List[str]): A list of error messages, if available.
|
|
"""
|
|
try:
|
|
EditTreeSchema.parse_obj(obj)
|
|
return []
|
|
except ValidationError as e:
|
|
errors = e.errors()
|
|
data = defaultdict(list)
|
|
for error in errors:
|
|
err_loc = " -> ".join([str(p) for p in error.get("loc", [])])
|
|
data[err_loc].append(error.get("msg"))
|
|
return [f"[{loc}] {', '.join(msg)}" for loc, msg in data.items()] # type: ignore[arg-type]
|