mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-29 23:17:59 +03:00 
			
		
		
		
	Remove some old version refs in the docs (#9448)
* Remove some old version refs in the docs * Remove warning * Update spacy/matcher/matcher.pyx * Remove all references to the punctuation warning Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									7b98aa4c16
								
							
						
					
					
						commit
						28ecf399da
					
				|  | @ -419,7 +419,7 @@ simply click on the "Suggest edits" button at the bottom of a page. | ||||||
| ## Publishing spaCy extensions and plugins | ## Publishing spaCy extensions and plugins | ||||||
| 
 | 
 | ||||||
| We're very excited about all the new possibilities for **community extensions** | We're very excited about all the new possibilities for **community extensions** | ||||||
| and plugins in spaCy v2.0, and we can't wait to see what you build with it! | and plugins in spaCy v3.0, and we can't wait to see what you build with it! | ||||||
| 
 | 
 | ||||||
| - An extension or plugin should add substantial functionality, be | - An extension or plugin should add substantial functionality, be | ||||||
|   **well-documented** and **open-source**. It should be available for users to download |   **well-documented** and **open-source**. It should be available for users to download | ||||||
|  |  | ||||||
|  | @ -203,7 +203,6 @@ def debug_data( | ||||||
|         has_low_data_warning = False |         has_low_data_warning = False | ||||||
|         has_no_neg_warning = False |         has_no_neg_warning = False | ||||||
|         has_ws_ents_error = False |         has_ws_ents_error = False | ||||||
|         has_punct_ents_warning = False |  | ||||||
| 
 | 
 | ||||||
|         msg.divider("Named Entity Recognition") |         msg.divider("Named Entity Recognition") | ||||||
|         msg.info(f"{len(model_labels)} label(s)") |         msg.info(f"{len(model_labels)} label(s)") | ||||||
|  | @ -230,10 +229,6 @@ def debug_data( | ||||||
|             msg.fail(f"{gold_train_data['ws_ents']} invalid whitespace entity spans") |             msg.fail(f"{gold_train_data['ws_ents']} invalid whitespace entity spans") | ||||||
|             has_ws_ents_error = True |             has_ws_ents_error = True | ||||||
| 
 | 
 | ||||||
|         if gold_train_data["punct_ents"]: |  | ||||||
|             msg.warn(f"{gold_train_data['punct_ents']} entity span(s) with punctuation") |  | ||||||
|             has_punct_ents_warning = True |  | ||||||
| 
 |  | ||||||
|         for label in labels: |         for label in labels: | ||||||
|             if label_counts[label] <= NEW_LABEL_THRESHOLD: |             if label_counts[label] <= NEW_LABEL_THRESHOLD: | ||||||
|                 msg.warn( |                 msg.warn( | ||||||
|  | @ -253,8 +248,6 @@ def debug_data( | ||||||
|             msg.good("Examples without occurrences available for all labels") |             msg.good("Examples without occurrences available for all labels") | ||||||
|         if not has_ws_ents_error: |         if not has_ws_ents_error: | ||||||
|             msg.good("No entities consisting of or starting/ending with whitespace") |             msg.good("No entities consisting of or starting/ending with whitespace") | ||||||
|         if not has_punct_ents_warning: |  | ||||||
|             msg.good("No entities consisting of or starting/ending with punctuation") |  | ||||||
| 
 | 
 | ||||||
|         if has_low_data_warning: |         if has_low_data_warning: | ||||||
|             msg.text( |             msg.text( | ||||||
|  | @ -270,15 +263,9 @@ def debug_data( | ||||||
|                 show=verbose, |                 show=verbose, | ||||||
|             ) |             ) | ||||||
|         if has_ws_ents_error: |         if has_ws_ents_error: | ||||||
|             msg.text( |  | ||||||
|                 "As of spaCy v2.1.0, entity spans consisting of or starting/ending " |  | ||||||
|                 "with whitespace characters are considered invalid." |  | ||||||
|             ) |  | ||||||
| 
 |  | ||||||
|         if has_punct_ents_warning: |  | ||||||
|             msg.text( |             msg.text( | ||||||
|                 "Entity spans consisting of or starting/ending " |                 "Entity spans consisting of or starting/ending " | ||||||
|                 "with punctuation can not be trained with a noise level > 0." |                 "with whitespace characters are considered invalid." | ||||||
|             ) |             ) | ||||||
| 
 | 
 | ||||||
|     if "textcat" in factory_names: |     if "textcat" in factory_names: | ||||||
|  | @ -578,7 +565,6 @@ def _compile_gold( | ||||||
|         "words": Counter(), |         "words": Counter(), | ||||||
|         "roots": Counter(), |         "roots": Counter(), | ||||||
|         "ws_ents": 0, |         "ws_ents": 0, | ||||||
|         "punct_ents": 0, |  | ||||||
|         "n_words": 0, |         "n_words": 0, | ||||||
|         "n_misaligned_words": 0, |         "n_misaligned_words": 0, | ||||||
|         "words_missing_vectors": Counter(), |         "words_missing_vectors": Counter(), | ||||||
|  | @ -613,16 +599,6 @@ def _compile_gold( | ||||||
|                 if label.startswith(("B-", "U-", "L-")) and doc[i].is_space: |                 if label.startswith(("B-", "U-", "L-")) and doc[i].is_space: | ||||||
|                     # "Illegal" whitespace entity |                     # "Illegal" whitespace entity | ||||||
|                     data["ws_ents"] += 1 |                     data["ws_ents"] += 1 | ||||||
|                 if label.startswith(("B-", "U-", "L-")) and doc[i].text in [ |  | ||||||
|                     ".", |  | ||||||
|                     "'", |  | ||||||
|                     "!", |  | ||||||
|                     "?", |  | ||||||
|                     ",", |  | ||||||
|                 ]: |  | ||||||
|                     # punctuation entity: could be replaced by whitespace when training with noise, |  | ||||||
|                     # so add a warning to alert the user to this unexpected side effect. |  | ||||||
|                     data["punct_ents"] += 1 |  | ||||||
|                 if label.startswith(("B-", "U-")): |                 if label.startswith(("B-", "U-")): | ||||||
|                     combined_label = label.split("-")[1] |                     combined_label = label.split("-")[1] | ||||||
|                     data["ner"][combined_label] += 1 |                     data["ner"][combined_label] += 1 | ||||||
|  |  | ||||||
|  | @ -96,10 +96,8 @@ cdef class Matcher: | ||||||
|         by returning a non-overlapping set per key, either taking preference to |         by returning a non-overlapping set per key, either taking preference to | ||||||
|         the first greedy match ("FIRST"), or the longest ("LONGEST"). |         the first greedy match ("FIRST"), or the longest ("LONGEST"). | ||||||
| 
 | 
 | ||||||
|         As of spaCy v2.2.2, Matcher.add supports the future API, which makes |         Since spaCy v2.2.2, Matcher.add takes a list of patterns as the second | ||||||
|         the patterns the second argument and a list (instead of a variable |         argument, and the on_match callback is an optional keyword argument. | ||||||
|         number of arguments). The on_match callback becomes an optional keyword |  | ||||||
|         argument. |  | ||||||
| 
 | 
 | ||||||
|         key (Union[str, int]): The match ID. |         key (Union[str, int]): The match ID. | ||||||
|         patterns (list): The patterns to add for the given key. |         patterns (list): The patterns to add for the given key. | ||||||
|  |  | ||||||
|  | @ -157,9 +157,8 @@ cdef class PhraseMatcher: | ||||||
|         """Add a match-rule to the phrase-matcher. A match-rule consists of: an ID |         """Add a match-rule to the phrase-matcher. A match-rule consists of: an ID | ||||||
|         key, an on_match callback, and one or more patterns. |         key, an on_match callback, and one or more patterns. | ||||||
| 
 | 
 | ||||||
|         As of spaCy v2.2.2, PhraseMatcher.add supports the future API, which |         Since spaCy v2.2.2, PhraseMatcher.add takes a list of patterns as the | ||||||
|         makes the patterns the second argument and a list (instead of a variable |         second argument, with the on_match callback as an optional keyword | ||||||
|         number of arguments). The on_match callback becomes an optional keyword |  | ||||||
|         argument. |         argument. | ||||||
| 
 | 
 | ||||||
|         key (str): The match ID. |         key (str): The match ID. | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user