mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Merge pull request #5893 from explosion/feature/validate-arg
This commit is contained in:
commit
6f3649923c
|
@ -6,7 +6,7 @@ requires = [
|
|||
"cymem>=2.0.2,<2.1.0",
|
||||
"preshed>=3.0.2,<3.1.0",
|
||||
"murmurhash>=0.28.0,<1.1.0",
|
||||
"thinc>=8.0.0a22,<8.0.0a30",
|
||||
"thinc>=8.0.0a23,<8.0.0a30",
|
||||
"blis>=0.4.0,<0.5.0",
|
||||
"pytokenizations",
|
||||
"smart_open>=2.0.0,<3.0.0"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# Our libraries
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a22,<8.0.0a30
|
||||
thinc>=8.0.0a23,<8.0.0a30
|
||||
blis>=0.4.0,<0.5.0
|
||||
ml_datasets>=0.1.1
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
|
|
|
@ -34,13 +34,13 @@ setup_requires =
|
|||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
thinc>=8.0.0a22,<8.0.0a30
|
||||
thinc>=8.0.0a23,<8.0.0a30
|
||||
install_requires =
|
||||
# Our libraries
|
||||
murmurhash>=0.28.0,<1.1.0
|
||||
cymem>=2.0.2,<2.1.0
|
||||
preshed>=3.0.2,<3.1.0
|
||||
thinc>=8.0.0a22,<8.0.0a30
|
||||
thinc>=8.0.0a23,<8.0.0a30
|
||||
blis>=0.4.0,<0.5.0
|
||||
wasabi>=0.7.1,<1.1.0
|
||||
srsly>=2.1.0,<3.0.0
|
||||
|
|
|
@ -17,13 +17,18 @@ MatcherPatternType = List[Dict[Union[int, str], Any]]
|
|||
AttributeRulerPatternType = Dict[str, Union[MatcherPatternType, Dict, int]]
|
||||
|
||||
|
||||
@Language.factory("attribute_ruler")
|
||||
@Language.factory(
|
||||
"attribute_ruler", default_config={"pattern_dicts": None, "validate": False}
|
||||
)
|
||||
def make_attribute_ruler(
|
||||
nlp: Language,
|
||||
name: str,
|
||||
pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
|
||||
pattern_dicts: Optional[Iterable[AttributeRulerPatternType]],
|
||||
validate: bool,
|
||||
):
|
||||
return AttributeRuler(nlp.vocab, name, pattern_dicts=pattern_dicts)
|
||||
return AttributeRuler(
|
||||
nlp.vocab, name, pattern_dicts=pattern_dicts, validate=validate
|
||||
)
|
||||
|
||||
|
||||
class AttributeRuler(Pipe):
|
||||
|
@ -39,6 +44,7 @@ class AttributeRuler(Pipe):
|
|||
name: str = "attribute_ruler",
|
||||
*,
|
||||
pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
|
||||
validate: bool = False,
|
||||
) -> None:
|
||||
"""Initialize the AttributeRuler.
|
||||
|
||||
|
@ -54,7 +60,7 @@ class AttributeRuler(Pipe):
|
|||
"""
|
||||
self.name = name
|
||||
self.vocab = vocab
|
||||
self.matcher = Matcher(self.vocab)
|
||||
self.matcher = Matcher(self.vocab, validate=validate)
|
||||
self.attrs = []
|
||||
self._attrs_unnormed = [] # store for reference
|
||||
self.indices = []
|
||||
|
|
|
@ -20,7 +20,7 @@ PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
|
|||
assigns=["doc.ents", "token.ent_type", "token.ent_iob"],
|
||||
default_config={
|
||||
"phrase_matcher_attr": None,
|
||||
"validation": False,
|
||||
"validate": False,
|
||||
"overwrite_ents": False,
|
||||
"ent_id_sep": DEFAULT_ENT_ID_SEP,
|
||||
},
|
||||
|
@ -31,7 +31,7 @@ def make_entity_ruler(
|
|||
nlp: Language,
|
||||
name: str,
|
||||
phrase_matcher_attr: Optional[Union[int, str]],
|
||||
validation: bool,
|
||||
validate: bool,
|
||||
overwrite_ents: bool,
|
||||
ent_id_sep: str,
|
||||
):
|
||||
|
@ -39,7 +39,7 @@ def make_entity_ruler(
|
|||
nlp,
|
||||
name,
|
||||
phrase_matcher_attr=phrase_matcher_attr,
|
||||
validate=validation,
|
||||
validate=validate,
|
||||
overwrite_ents=overwrite_ents,
|
||||
ent_id_sep=ent_id_sep,
|
||||
)
|
||||
|
|
|
@ -25,8 +25,8 @@ how the component should be configured. You can override its settings via the
|
|||
>
|
||||
> ```python
|
||||
> config = {
|
||||
> "validation": True,
|
||||
> "pattern_dicts": None,
|
||||
> "validate": True,
|
||||
> }
|
||||
> nlp.add_pipe("attribute_ruler", config=config)
|
||||
> ```
|
||||
|
@ -34,7 +34,7 @@ how the component should be configured. You can override its settings via the
|
|||
| Setting | Type | Description | Default |
|
||||
| --------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------- | ------- |
|
||||
| `pattern_dicts` | `Iterable[dict]` | A list of pattern dicts with the keys as the arguments to [`AttributeRuler.add`](#add) (`patterns`/`attrs`/`index`) to add as patterns. | `None` |
|
||||
| `validation` | bool | Whether patterns should be validated, passed to `Matcher` as `validate`. | `False` |
|
||||
| `validate` | bool | Whether patterns should be validated (passed to the `Matcher`). | `False` |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/attributeruler.py
|
||||
|
@ -65,8 +65,8 @@ pattern_dicts = \[
|
|||
| `vocab` | `Vocab` | The shared nlp object to pass the vocab to the matchers and process phrase patterns. |
|
||||
| `name` | str | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. Used to disable the current entity ruler while creating phrase patterns with the nlp object. |
|
||||
| _keyword-only_ | | |
|
||||
| `pattern_dicts` | `Iterable[Dict]]` | Optional patterns to load in on initialization. |
|
||||
| `validate` | bool | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. Defaults to `False`. |
|
||||
| `pattern_dicts` | `Iterable[Dict]]` | Optional patterns to load in on initialization. Defaults to `None`. |
|
||||
| `validate` | bool | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. |
|
||||
|
||||
## AttributeRuler.\_\_call\_\_ {#call tag="method"}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ how the component should be configured. You can override its settings via the
|
|||
> ```python
|
||||
> config = {
|
||||
> "phrase_matcher_attr": None,
|
||||
> "validation": True,
|
||||
> "validate": True,
|
||||
> "overwrite_ents": False,
|
||||
> "ent_id_sep": "||",
|
||||
> }
|
||||
|
@ -37,7 +37,7 @@ how the component should be configured. You can override its settings via the
|
|||
| Setting | Type | Description | Default |
|
||||
| --------------------- | ---- | ------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
|
||||
| `phrase_matcher_attr` | str | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. | `None` |
|
||||
| `validation` | bool | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. | `False` |
|
||||
| `validate` | bool | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). | `False` |
|
||||
| `overwrite_ents` | bool | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. | `False` |
|
||||
| `ent_id_sep` | str | Separator used internally for entity IDs. | `"||"` |
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user