Remove some old version refs in the docs (#9448)

* Remove some old version refs in the docs * Remove warning * Update spacy/matcher/matcher.pyx * Remove all references to the punctuation warning Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
2025-10-30 23:47:31 +03:00 · 2021-10-21 09:17:59 +00:00 · 2021-10-21 09:17:59 +00:00 · 28ecf399da
commit 28ecf399da
parent 7b98aa4c16
4 changed files with 6 additions and 33 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -419,7 +419,7 @@ simply click on the "Suggest edits" button at the bottom of a page.
 ## Publishing spaCy extensions and plugins

 We're very excited about all the new possibilities for **community extensions**
-and plugins in spaCy v2.0, and we can't wait to see what you build with it!
+and plugins in spaCy v3.0, and we can't wait to see what you build with it!

 - An extension or plugin should add substantial functionality, be
  **well-documented** and **open-source**. It should be available for users to download
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -203,7 +203,6 @@ def debug_data(
        has_low_data_warning = False
        has_no_neg_warning = False
        has_ws_ents_error = False
-        has_punct_ents_warning = False

        msg.divider("Named Entity Recognition")
        msg.info(f"{len(model_labels)} label(s)")
@ -230,10 +229,6 @@ def debug_data(
            msg.fail(f"{gold_train_data['ws_ents']} invalid whitespace entity spans")
            has_ws_ents_error = True

-        if gold_train_data["punct_ents"]:
-            msg.warn(f"{gold_train_data['punct_ents']} entity span(s) with punctuation")
-            has_punct_ents_warning = True
-
        for label in labels:
            if label_counts[label] <= NEW_LABEL_THRESHOLD:
                msg.warn(
@ -253,8 +248,6 @@ def debug_data(
            msg.good("Examples without occurrences available for all labels")
        if not has_ws_ents_error:
            msg.good("No entities consisting of or starting/ending with whitespace")
-        if not has_punct_ents_warning:
-            msg.good("No entities consisting of or starting/ending with punctuation")

        if has_low_data_warning:
            msg.text(
@ -270,15 +263,9 @@ def debug_data(
                show=verbose,
            )
        if has_ws_ents_error:
-            msg.text(
-                "As of spaCy v2.1.0, entity spans consisting of or starting/ending "
-                "with whitespace characters are considered invalid."
-            )
-
-        if has_punct_ents_warning:
            msg.text(
                "Entity spans consisting of or starting/ending "
-                "with punctuation can not be trained with a noise level > 0."
+                "with whitespace characters are considered invalid."
            )

    if "textcat" in factory_names:
@ -578,7 +565,6 @@ def _compile_gold(
        "words": Counter(),
        "roots": Counter(),
        "ws_ents": 0,
-        "punct_ents": 0,
        "n_words": 0,
        "n_misaligned_words": 0,
        "words_missing_vectors": Counter(),
@ -613,16 +599,6 @@ def _compile_gold(
                if label.startswith(("B-", "U-", "L-")) and doc[i].is_space:
                    # "Illegal" whitespace entity
                    data["ws_ents"] += 1
-                if label.startswith(("B-", "U-", "L-")) and doc[i].text in [
-                    ".",
-                    "'",
-                    "!",
-                    "?",
-                    ",",
-                ]:
-                    # punctuation entity: could be replaced by whitespace when training with noise,
-                    # so add a warning to alert the user to this unexpected side effect.
-                    data["punct_ents"] += 1
                if label.startswith(("B-", "U-")):
                    combined_label = label.split("-")[1]
                    data["ner"][combined_label] += 1
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -96,10 +96,8 @@ cdef class Matcher:
        by returning a non-overlapping set per key, either taking preference to
        the first greedy match ("FIRST"), or the longest ("LONGEST").

-        As of spaCy v2.2.2, Matcher.add supports the future API, which makes
-        the patterns the second argument and a list (instead of a variable
-        number of arguments). The on_match callback becomes an optional keyword
-        argument.
+        Since spaCy v2.2.2, Matcher.add takes a list of patterns as the second
+        argument, and the on_match callback is an optional keyword argument.

        key (Union[str, int]): The match ID.
        patterns (list): The patterns to add for the given key.
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@ -157,9 +157,8 @@ cdef class PhraseMatcher:
        """Add a match-rule to the phrase-matcher. A match-rule consists of: an ID
        key, an on_match callback, and one or more patterns.

-        As of spaCy v2.2.2, PhraseMatcher.add supports the future API, which
-        makes the patterns the second argument and a list (instead of a variable
-        number of arguments). The on_match callback becomes an optional keyword
+        Since spaCy v2.2.2, PhraseMatcher.add takes a list of patterns as the
+        second argument, with the on_match callback as an optional keyword
        argument.

        key (str): The match ID.