mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-22 02:44:15 +03:00
Remove some old version refs in the docs (#9448)
* Remove some old version refs in the docs * Remove warning * Update spacy/matcher/matcher.pyx * Remove all references to the punctuation warning Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
7b98aa4c16
commit
28ecf399da
|
@ -419,7 +419,7 @@ simply click on the "Suggest edits" button at the bottom of a page.
|
||||||
## Publishing spaCy extensions and plugins
|
## Publishing spaCy extensions and plugins
|
||||||
|
|
||||||
We're very excited about all the new possibilities for **community extensions**
|
We're very excited about all the new possibilities for **community extensions**
|
||||||
and plugins in spaCy v2.0, and we can't wait to see what you build with it!
|
and plugins in spaCy v3.0, and we can't wait to see what you build with it!
|
||||||
|
|
||||||
- An extension or plugin should add substantial functionality, be
|
- An extension or plugin should add substantial functionality, be
|
||||||
**well-documented** and **open-source**. It should be available for users to download
|
**well-documented** and **open-source**. It should be available for users to download
|
||||||
|
|
|
@ -203,7 +203,6 @@ def debug_data(
|
||||||
has_low_data_warning = False
|
has_low_data_warning = False
|
||||||
has_no_neg_warning = False
|
has_no_neg_warning = False
|
||||||
has_ws_ents_error = False
|
has_ws_ents_error = False
|
||||||
has_punct_ents_warning = False
|
|
||||||
|
|
||||||
msg.divider("Named Entity Recognition")
|
msg.divider("Named Entity Recognition")
|
||||||
msg.info(f"{len(model_labels)} label(s)")
|
msg.info(f"{len(model_labels)} label(s)")
|
||||||
|
@ -230,10 +229,6 @@ def debug_data(
|
||||||
msg.fail(f"{gold_train_data['ws_ents']} invalid whitespace entity spans")
|
msg.fail(f"{gold_train_data['ws_ents']} invalid whitespace entity spans")
|
||||||
has_ws_ents_error = True
|
has_ws_ents_error = True
|
||||||
|
|
||||||
if gold_train_data["punct_ents"]:
|
|
||||||
msg.warn(f"{gold_train_data['punct_ents']} entity span(s) with punctuation")
|
|
||||||
has_punct_ents_warning = True
|
|
||||||
|
|
||||||
for label in labels:
|
for label in labels:
|
||||||
if label_counts[label] <= NEW_LABEL_THRESHOLD:
|
if label_counts[label] <= NEW_LABEL_THRESHOLD:
|
||||||
msg.warn(
|
msg.warn(
|
||||||
|
@ -253,8 +248,6 @@ def debug_data(
|
||||||
msg.good("Examples without occurrences available for all labels")
|
msg.good("Examples without occurrences available for all labels")
|
||||||
if not has_ws_ents_error:
|
if not has_ws_ents_error:
|
||||||
msg.good("No entities consisting of or starting/ending with whitespace")
|
msg.good("No entities consisting of or starting/ending with whitespace")
|
||||||
if not has_punct_ents_warning:
|
|
||||||
msg.good("No entities consisting of or starting/ending with punctuation")
|
|
||||||
|
|
||||||
if has_low_data_warning:
|
if has_low_data_warning:
|
||||||
msg.text(
|
msg.text(
|
||||||
|
@ -270,15 +263,9 @@ def debug_data(
|
||||||
show=verbose,
|
show=verbose,
|
||||||
)
|
)
|
||||||
if has_ws_ents_error:
|
if has_ws_ents_error:
|
||||||
msg.text(
|
|
||||||
"As of spaCy v2.1.0, entity spans consisting of or starting/ending "
|
|
||||||
"with whitespace characters are considered invalid."
|
|
||||||
)
|
|
||||||
|
|
||||||
if has_punct_ents_warning:
|
|
||||||
msg.text(
|
msg.text(
|
||||||
"Entity spans consisting of or starting/ending "
|
"Entity spans consisting of or starting/ending "
|
||||||
"with punctuation can not be trained with a noise level > 0."
|
"with whitespace characters are considered invalid."
|
||||||
)
|
)
|
||||||
|
|
||||||
if "textcat" in factory_names:
|
if "textcat" in factory_names:
|
||||||
|
@ -578,7 +565,6 @@ def _compile_gold(
|
||||||
"words": Counter(),
|
"words": Counter(),
|
||||||
"roots": Counter(),
|
"roots": Counter(),
|
||||||
"ws_ents": 0,
|
"ws_ents": 0,
|
||||||
"punct_ents": 0,
|
|
||||||
"n_words": 0,
|
"n_words": 0,
|
||||||
"n_misaligned_words": 0,
|
"n_misaligned_words": 0,
|
||||||
"words_missing_vectors": Counter(),
|
"words_missing_vectors": Counter(),
|
||||||
|
@ -613,16 +599,6 @@ def _compile_gold(
|
||||||
if label.startswith(("B-", "U-", "L-")) and doc[i].is_space:
|
if label.startswith(("B-", "U-", "L-")) and doc[i].is_space:
|
||||||
# "Illegal" whitespace entity
|
# "Illegal" whitespace entity
|
||||||
data["ws_ents"] += 1
|
data["ws_ents"] += 1
|
||||||
if label.startswith(("B-", "U-", "L-")) and doc[i].text in [
|
|
||||||
".",
|
|
||||||
"'",
|
|
||||||
"!",
|
|
||||||
"?",
|
|
||||||
",",
|
|
||||||
]:
|
|
||||||
# punctuation entity: could be replaced by whitespace when training with noise,
|
|
||||||
# so add a warning to alert the user to this unexpected side effect.
|
|
||||||
data["punct_ents"] += 1
|
|
||||||
if label.startswith(("B-", "U-")):
|
if label.startswith(("B-", "U-")):
|
||||||
combined_label = label.split("-")[1]
|
combined_label = label.split("-")[1]
|
||||||
data["ner"][combined_label] += 1
|
data["ner"][combined_label] += 1
|
||||||
|
|
|
@ -96,10 +96,8 @@ cdef class Matcher:
|
||||||
by returning a non-overlapping set per key, either taking preference to
|
by returning a non-overlapping set per key, either taking preference to
|
||||||
the first greedy match ("FIRST"), or the longest ("LONGEST").
|
the first greedy match ("FIRST"), or the longest ("LONGEST").
|
||||||
|
|
||||||
As of spaCy v2.2.2, Matcher.add supports the future API, which makes
|
Since spaCy v2.2.2, Matcher.add takes a list of patterns as the second
|
||||||
the patterns the second argument and a list (instead of a variable
|
argument, and the on_match callback is an optional keyword argument.
|
||||||
number of arguments). The on_match callback becomes an optional keyword
|
|
||||||
argument.
|
|
||||||
|
|
||||||
key (Union[str, int]): The match ID.
|
key (Union[str, int]): The match ID.
|
||||||
patterns (list): The patterns to add for the given key.
|
patterns (list): The patterns to add for the given key.
|
||||||
|
|
|
@ -157,9 +157,8 @@ cdef class PhraseMatcher:
|
||||||
"""Add a match-rule to the phrase-matcher. A match-rule consists of: an ID
|
"""Add a match-rule to the phrase-matcher. A match-rule consists of: an ID
|
||||||
key, an on_match callback, and one or more patterns.
|
key, an on_match callback, and one or more patterns.
|
||||||
|
|
||||||
As of spaCy v2.2.2, PhraseMatcher.add supports the future API, which
|
Since spaCy v2.2.2, PhraseMatcher.add takes a list of patterns as the
|
||||||
makes the patterns the second argument and a list (instead of a variable
|
second argument, with the on_match callback as an optional keyword
|
||||||
number of arguments). The on_match callback becomes an optional keyword
|
|
||||||
argument.
|
argument.
|
||||||
|
|
||||||
key (str): The match ID.
|
key (str): The match ID.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user