Remove some old version refs in the docs (#9448)

* Remove some old version refs in the docs

* Remove warning

* Update spacy/matcher/matcher.pyx

* Remove all references to the punctuation warning

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
Paul O'Leary McCann 2021-10-21 09:17:59 +00:00 committed by GitHub
parent 7b98aa4c16
commit 28ecf399da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 6 additions and 33 deletions

View File

@ -419,7 +419,7 @@ simply click on the "Suggest edits" button at the bottom of a page.
## Publishing spaCy extensions and plugins ## Publishing spaCy extensions and plugins
We're very excited about all the new possibilities for **community extensions** We're very excited about all the new possibilities for **community extensions**
and plugins in spaCy v2.0, and we can't wait to see what you build with it! and plugins in spaCy v3.0, and we can't wait to see what you build with it!
- An extension or plugin should add substantial functionality, be - An extension or plugin should add substantial functionality, be
**well-documented** and **open-source**. It should be available for users to download **well-documented** and **open-source**. It should be available for users to download

View File

@ -203,7 +203,6 @@ def debug_data(
has_low_data_warning = False has_low_data_warning = False
has_no_neg_warning = False has_no_neg_warning = False
has_ws_ents_error = False has_ws_ents_error = False
has_punct_ents_warning = False
msg.divider("Named Entity Recognition") msg.divider("Named Entity Recognition")
msg.info(f"{len(model_labels)} label(s)") msg.info(f"{len(model_labels)} label(s)")
@ -230,10 +229,6 @@ def debug_data(
msg.fail(f"{gold_train_data['ws_ents']} invalid whitespace entity spans") msg.fail(f"{gold_train_data['ws_ents']} invalid whitespace entity spans")
has_ws_ents_error = True has_ws_ents_error = True
if gold_train_data["punct_ents"]:
msg.warn(f"{gold_train_data['punct_ents']} entity span(s) with punctuation")
has_punct_ents_warning = True
for label in labels: for label in labels:
if label_counts[label] <= NEW_LABEL_THRESHOLD: if label_counts[label] <= NEW_LABEL_THRESHOLD:
msg.warn( msg.warn(
@ -253,8 +248,6 @@ def debug_data(
msg.good("Examples without occurrences available for all labels") msg.good("Examples without occurrences available for all labels")
if not has_ws_ents_error: if not has_ws_ents_error:
msg.good("No entities consisting of or starting/ending with whitespace") msg.good("No entities consisting of or starting/ending with whitespace")
if not has_punct_ents_warning:
msg.good("No entities consisting of or starting/ending with punctuation")
if has_low_data_warning: if has_low_data_warning:
msg.text( msg.text(
@ -270,15 +263,9 @@ def debug_data(
show=verbose, show=verbose,
) )
if has_ws_ents_error: if has_ws_ents_error:
msg.text(
"As of spaCy v2.1.0, entity spans consisting of or starting/ending "
"with whitespace characters are considered invalid."
)
if has_punct_ents_warning:
msg.text( msg.text(
"Entity spans consisting of or starting/ending " "Entity spans consisting of or starting/ending "
"with punctuation can not be trained with a noise level > 0." "with whitespace characters are considered invalid."
) )
if "textcat" in factory_names: if "textcat" in factory_names:
@ -578,7 +565,6 @@ def _compile_gold(
"words": Counter(), "words": Counter(),
"roots": Counter(), "roots": Counter(),
"ws_ents": 0, "ws_ents": 0,
"punct_ents": 0,
"n_words": 0, "n_words": 0,
"n_misaligned_words": 0, "n_misaligned_words": 0,
"words_missing_vectors": Counter(), "words_missing_vectors": Counter(),
@ -613,16 +599,6 @@ def _compile_gold(
if label.startswith(("B-", "U-", "L-")) and doc[i].is_space: if label.startswith(("B-", "U-", "L-")) and doc[i].is_space:
# "Illegal" whitespace entity # "Illegal" whitespace entity
data["ws_ents"] += 1 data["ws_ents"] += 1
if label.startswith(("B-", "U-", "L-")) and doc[i].text in [
".",
"'",
"!",
"?",
",",
]:
# punctuation entity: could be replaced by whitespace when training with noise,
# so add a warning to alert the user to this unexpected side effect.
data["punct_ents"] += 1
if label.startswith(("B-", "U-")): if label.startswith(("B-", "U-")):
combined_label = label.split("-")[1] combined_label = label.split("-")[1]
data["ner"][combined_label] += 1 data["ner"][combined_label] += 1

View File

@ -96,10 +96,8 @@ cdef class Matcher:
by returning a non-overlapping set per key, either taking preference to by returning a non-overlapping set per key, either taking preference to
the first greedy match ("FIRST"), or the longest ("LONGEST"). the first greedy match ("FIRST"), or the longest ("LONGEST").
As of spaCy v2.2.2, Matcher.add supports the future API, which makes Since spaCy v2.2.2, Matcher.add takes a list of patterns as the second
the patterns the second argument and a list (instead of a variable argument, and the on_match callback is an optional keyword argument.
number of arguments). The on_match callback becomes an optional keyword
argument.
key (Union[str, int]): The match ID. key (Union[str, int]): The match ID.
patterns (list): The patterns to add for the given key. patterns (list): The patterns to add for the given key.

View File

@ -157,9 +157,8 @@ cdef class PhraseMatcher:
"""Add a match-rule to the phrase-matcher. A match-rule consists of: an ID """Add a match-rule to the phrase-matcher. A match-rule consists of: an ID
key, an on_match callback, and one or more patterns. key, an on_match callback, and one or more patterns.
As of spaCy v2.2.2, PhraseMatcher.add supports the future API, which Since spaCy v2.2.2, PhraseMatcher.add takes a list of patterns as the
makes the patterns the second argument and a list (instead of a variable second argument, with the on_match callback as an optional keyword
number of arguments). The on_match callback becomes an optional keyword
argument. argument.
key (str): The match ID. key (str): The match ID.