Remove some old version refs in the docs (#9448)

* Remove some old version refs in the docs

* Remove warning

* Update spacy/matcher/matcher.pyx

* Remove all references to the punctuation warning

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
Paul O'Leary McCann 2021-10-21 09:17:59 +00:00 committed by GitHub
parent 7b98aa4c16
commit 28ecf399da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 6 additions and 33 deletions

View File

@ -419,7 +419,7 @@ simply click on the "Suggest edits" button at the bottom of a page.
## Publishing spaCy extensions and plugins
We're very excited about all the new possibilities for **community extensions**
and plugins in spaCy v2.0, and we can't wait to see what you build with it!
and plugins in spaCy v3.0, and we can't wait to see what you build with it!
- An extension or plugin should add substantial functionality, be
**well-documented** and **open-source**. It should be available for users to download

View File

@ -203,7 +203,6 @@ def debug_data(
has_low_data_warning = False
has_no_neg_warning = False
has_ws_ents_error = False
has_punct_ents_warning = False
msg.divider("Named Entity Recognition")
msg.info(f"{len(model_labels)} label(s)")
@ -230,10 +229,6 @@ def debug_data(
msg.fail(f"{gold_train_data['ws_ents']} invalid whitespace entity spans")
has_ws_ents_error = True
if gold_train_data["punct_ents"]:
msg.warn(f"{gold_train_data['punct_ents']} entity span(s) with punctuation")
has_punct_ents_warning = True
for label in labels:
if label_counts[label] <= NEW_LABEL_THRESHOLD:
msg.warn(
@ -253,8 +248,6 @@ def debug_data(
msg.good("Examples without occurrences available for all labels")
if not has_ws_ents_error:
msg.good("No entities consisting of or starting/ending with whitespace")
if not has_punct_ents_warning:
msg.good("No entities consisting of or starting/ending with punctuation")
if has_low_data_warning:
msg.text(
@ -270,15 +263,9 @@ def debug_data(
show=verbose,
)
if has_ws_ents_error:
msg.text(
"As of spaCy v2.1.0, entity spans consisting of or starting/ending "
"with whitespace characters are considered invalid."
)
if has_punct_ents_warning:
msg.text(
"Entity spans consisting of or starting/ending "
"with punctuation can not be trained with a noise level > 0."
"with whitespace characters are considered invalid."
)
if "textcat" in factory_names:
@ -578,7 +565,6 @@ def _compile_gold(
"words": Counter(),
"roots": Counter(),
"ws_ents": 0,
"punct_ents": 0,
"n_words": 0,
"n_misaligned_words": 0,
"words_missing_vectors": Counter(),
@ -613,16 +599,6 @@ def _compile_gold(
if label.startswith(("B-", "U-", "L-")) and doc[i].is_space:
# "Illegal" whitespace entity
data["ws_ents"] += 1
if label.startswith(("B-", "U-", "L-")) and doc[i].text in [
".",
"'",
"!",
"?",
",",
]:
# punctuation entity: could be replaced by whitespace when training with noise,
# so add a warning to alert the user to this unexpected side effect.
data["punct_ents"] += 1
if label.startswith(("B-", "U-")):
combined_label = label.split("-")[1]
data["ner"][combined_label] += 1

View File

@ -96,10 +96,8 @@ cdef class Matcher:
by returning a non-overlapping set per key, either taking preference to
the first greedy match ("FIRST"), or the longest ("LONGEST").
As of spaCy v2.2.2, Matcher.add supports the future API, which makes
the patterns the second argument and a list (instead of a variable
number of arguments). The on_match callback becomes an optional keyword
argument.
Since spaCy v2.2.2, Matcher.add takes a list of patterns as the second
argument, and the on_match callback is an optional keyword argument.
key (Union[str, int]): The match ID.
patterns (list): The patterns to add for the given key.

View File

@ -157,9 +157,8 @@ cdef class PhraseMatcher:
"""Add a match-rule to the phrase-matcher. A match-rule consists of: an ID
key, an on_match callback, and one or more patterns.
As of spaCy v2.2.2, PhraseMatcher.add supports the future API, which
makes the patterns the second argument and a list (instead of a variable
number of arguments). The on_match callback becomes an optional keyword
Since spaCy v2.2.2, PhraseMatcher.add takes a list of patterns as the
second argument, with the on_match callback as an optional keyword
argument.
key (str): The match ID.