Merge pull request #5893 from explosion/feature/validate-arg

2025-11-03 09:27:56 +03:00 · 2020-08-07 15:47:20 +02:00 · 2020-08-07 15:47:20 +02:00 · 6f3649923c
commit 6f3649923c
parent e829d3bf14 4aecccf153
7 changed files with 23 additions and 17 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -6,7 +6,7 @@ requires = [
    "cymem>=2.0.2,<2.1.0",
    "preshed>=3.0.2,<3.1.0",
    "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.0.0a22,<8.0.0a30",
+    "thinc>=8.0.0a23,<8.0.0a30",
    "blis>=0.4.0,<0.5.0",
    "pytokenizations",
    "smart_open>=2.0.0,<3.0.0"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,7 +1,7 @@
 # Our libraries
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.0.0a22,<8.0.0a30
+thinc>=8.0.0a23,<8.0.0a30
 blis>=0.4.0,<0.5.0
 ml_datasets>=0.1.1
 murmurhash>=0.28.0,<1.1.0
--- a/setup.cfg
+++ b/setup.cfg
@ -34,13 +34,13 @@ setup_requires =
    cymem>=2.0.2,<2.1.0
    preshed>=3.0.2,<3.1.0
    murmurhash>=0.28.0,<1.1.0
-    thinc>=8.0.0a22,<8.0.0a30
+    thinc>=8.0.0a23,<8.0.0a30
 install_requires =
    # Our libraries
    murmurhash>=0.28.0,<1.1.0
    cymem>=2.0.2,<2.1.0
    preshed>=3.0.2,<3.1.0
-    thinc>=8.0.0a22,<8.0.0a30
+    thinc>=8.0.0a23,<8.0.0a30
    blis>=0.4.0,<0.5.0
    wasabi>=0.7.1,<1.1.0
    srsly>=2.1.0,<3.0.0
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@ -17,13 +17,18 @@ MatcherPatternType = List[Dict[Union[int, str], Any]]
 AttributeRulerPatternType = Dict[str, Union[MatcherPatternType, Dict, int]]


-@Language.factory("attribute_ruler")
+@Language.factory(
+    "attribute_ruler", default_config={"pattern_dicts": None, "validate": False}
+)
 def make_attribute_ruler(
    nlp: Language,
    name: str,
-    pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
+    pattern_dicts: Optional[Iterable[AttributeRulerPatternType]],
+    validate: bool,
 ):
-    return AttributeRuler(nlp.vocab, name, pattern_dicts=pattern_dicts)
+    return AttributeRuler(
+        nlp.vocab, name, pattern_dicts=pattern_dicts, validate=validate
+    )


 class AttributeRuler(Pipe):
@ -39,6 +44,7 @@ class AttributeRuler(Pipe):
        name: str = "attribute_ruler",
        *,
        pattern_dicts: Optional[Iterable[AttributeRulerPatternType]] = None,
+        validate: bool = False,
    ) -> None:
        """Initialize the AttributeRuler.

@ -54,7 +60,7 @@ class AttributeRuler(Pipe):
        """
        self.name = name
        self.vocab = vocab
-        self.matcher = Matcher(self.vocab)
+        self.matcher = Matcher(self.vocab, validate=validate)
        self.attrs = []
        self._attrs_unnormed = []  # store for reference
        self.indices = []
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@ -20,7 +20,7 @@ PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
    assigns=["doc.ents", "token.ent_type", "token.ent_iob"],
    default_config={
        "phrase_matcher_attr": None,
-        "validation": False,
+        "validate": False,
        "overwrite_ents": False,
        "ent_id_sep": DEFAULT_ENT_ID_SEP,
    },
@ -31,7 +31,7 @@ def make_entity_ruler(
    nlp: Language,
    name: str,
    phrase_matcher_attr: Optional[Union[int, str]],
-    validation: bool,
+    validate: bool,
    overwrite_ents: bool,
    ent_id_sep: str,
 ):
@ -39,7 +39,7 @@ def make_entity_ruler(
        nlp,
        name,
        phrase_matcher_attr=phrase_matcher_attr,
-        validate=validation,
+        validate=validate,
        overwrite_ents=overwrite_ents,
        ent_id_sep=ent_id_sep,
    )
--- a/website/docs/api/attributeruler.md
+++ b/website/docs/api/attributeruler.md
@ -25,8 +25,8 @@ how the component should be configured. You can override its settings via the
 >
 > ```python
 > config = {
->    "validation": True,
 >    "pattern_dicts": None,
+>    "validate": True,
 > }
 > nlp.add_pipe("attribute_ruler", config=config)
 > ```
@ -34,7 +34,7 @@ how the component should be configured. You can override its settings via the
 | Setting         | Type             | Description                                                                                                                             | Default |
 | --------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------- | ------- |
 | `pattern_dicts` | `Iterable[dict]` | A list of pattern dicts with the keys as the arguments to [`AttributeRuler.add`](#add) (`patterns`/`attrs`/`index`) to add as patterns. | `None`  |
-| `validation`    | bool             | Whether patterns should be validated, passed to `Matcher` as `validate`.                                                                | `False` |
+| `validate`      | bool             | Whether patterns should be validated (passed to the `Matcher`).                                                                         | `False` |

 ```python
 https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/attributeruler.py
@ -65,8 +65,8 @@ pattern_dicts = \[
 | `vocab`         | `Vocab`           | The shared nlp object to pass the vocab to the matchers and process phrase patterns.                                                                                                                                          |
 | `name`          | str               | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. Used to disable the current entity ruler while creating phrase patterns with the nlp object. |
 | _keyword-only_  |                   |                                                                                                                                                                                                                               |
-| `pattern_dicts` | `Iterable[Dict]]` | Optional patterns to load in on initialization.                                                                                                                                                                               |
-| `validate`      | bool              | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. Defaults to `False`.                                                                                                                 |
+| `pattern_dicts` | `Iterable[Dict]]` | Optional patterns to load in on initialization. Defaults to `None`.                                                                                                                                                           |
+| `validate`      | bool              | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`.                                                                                                                                          |

 ## AttributeRuler.\_\_call\_\_ {#call tag="method"}

--- a/website/docs/api/entityruler.md
+++ b/website/docs/api/entityruler.md
@ -27,7 +27,7 @@ how the component should be configured. You can override its settings via the
 > ```python
 > config = {
 >    "phrase_matcher_attr": None,
->    "validation": True,
+>    "validate": True,
 >    "overwrite_ents": False,
 >    "ent_id_sep": "||",
 > }
@ -37,7 +37,7 @@ how the component should be configured. You can override its settings via the
 | Setting               | Type | Description                                                                                                                                 | Default |
 | --------------------- | ---- | ------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
 | `phrase_matcher_attr` | str  | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. | `None`  |
-| `validation`          | bool | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`.                                                    | `False` |
+| `validate`            | bool | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`).                                                         | `False` |
 | `overwrite_ents`      | bool | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary.                                 | `False` |
 | `ent_id_sep`          | str  | Separator used internally for entity IDs.                                                                                                   | `"||"`  |