mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Update section on new v2.0 features
This commit is contained in:
		
							parent
							
								
									f4658ff053
								
							
						
					
					
						commit
						c25f3133ca
					
				|  | @ -8,6 +8,65 @@ p | ||||||
| 
 | 
 | ||||||
| +h(2, "features") New features | +h(2, "features") New features | ||||||
| 
 | 
 | ||||||
|  | +h(3, "features-pipelines") Improved processing pipelines | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     # Modify an existing pipeline | ||||||
|  |     nlp = spacy.load('en') | ||||||
|  |     nlp.pipeline.append(my_component) | ||||||
|  | 
 | ||||||
|  |     # Register a factory to create a component | ||||||
|  |     spacy.set_factory('my_factory', my_factory) | ||||||
|  |     nlp = Language(pipeline=['my_factory', mycomponent]) | ||||||
|  | 
 | ||||||
|  | p | ||||||
|  |     |  It's now much easier to customise the pipeline with your own components. | ||||||
|  |     |  Components are functions that receive a #[code Doc] object, modify and | ||||||
|  |     |  return it. If your component is stateful, you'll want to create a new one | ||||||
|  |     |  for each pipeline. You can do that by defining and registering a factory | ||||||
|  |     |  which receives the shared #[code Vocab] object and returns a component. | ||||||
|  | 
 | ||||||
|  | p | ||||||
|  |     |  spaCy's default components – the vectorizer, tagger, parser and entity | ||||||
|  |     |  recognizer, can be added to your pipeline by using their string IDs. | ||||||
|  |     |  This way, you won't have to worry about finding and implementing them – | ||||||
|  |     |  to use the default tagger, simply add #[code "tagger"] to the pipeline, | ||||||
|  |     |  and spaCy will know what to do. | ||||||
|  | 
 | ||||||
|  | +infobox | ||||||
|  |     |  #[strong API:] #[+api("language") #[code Language]] | ||||||
|  |     |  #[strong Usage:] #[+a("/docs/usage/language-processing-pipeline") Processing text] | ||||||
|  | 
 | ||||||
|  | +h(3, "features-serializer") Saving, loading and serialization | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     nlp = spacy.load('en') # shortcut link | ||||||
|  |     nlp = spacy.load('en_core_web_sm') # package | ||||||
|  |     nlp = spacy.load('/path/to/en') # unicode path | ||||||
|  |     nlp = spacy.load(Path('/path/to/en')) # pathlib Path | ||||||
|  | 
 | ||||||
|  |     nlp.to_disk('/path/to/nlp') | ||||||
|  |     nlp = English().from_disk('/path/to/nlp') | ||||||
|  | 
 | ||||||
|  | p | ||||||
|  |     |  spay's serialization API has been made consistent across classes and | ||||||
|  |     |  objects. All container classes and pipeline components now have a | ||||||
|  |     |  #[code to_bytes()], #[code from_bytes()], #[code to_disk()] and | ||||||
|  |     |  #[code from_disk()] method that supports the Pickle protocol. | ||||||
|  | 
 | ||||||
|  | p | ||||||
|  |     |  The improved #[code spacy.load] makes loading models easier and more | ||||||
|  |     |  transparent. You can load a model by supplying its | ||||||
|  |     |  #[+a("/docs/usage/models#usage") shortcut link], the name of an installed | ||||||
|  |     |  #[+a("/docs/usage/saving-loading#generating") model package] or a path. | ||||||
|  |     |  The #[code Language] class to initialise will be determined based on the | ||||||
|  |     |  model's settings. For a blank language, you can import the class directly, | ||||||
|  |     |  e.g. #[code from spacy.lang.en import English]. | ||||||
|  | 
 | ||||||
|  | +infobox | ||||||
|  |     |  #[strong API:] #[+api("spacy#load") #[code spacy.load]], #[+api("binder") #[code Binder]] | ||||||
|  |     |  #[strong Usage:] #[+a("/docs/usage/saving-loading") Saving and loading] | ||||||
|  | 
 | ||||||
| +h(3, "features-displacy") displaCy visualizer with Jupyter support | +h(3, "features-displacy") displaCy visualizer with Jupyter support | ||||||
| 
 | 
 | ||||||
| +aside-code("Example"). | +aside-code("Example"). | ||||||
|  | @ -28,33 +87,6 @@ p | ||||||
|     |  #[strong API:] #[+api("displacy") #[code displacy]] |     |  #[strong API:] #[+api("displacy") #[code displacy]] | ||||||
|     |  #[strong Usage:] #[+a("/docs/usage/visualizers") Visualizing spaCy] |     |  #[strong Usage:] #[+a("/docs/usage/visualizers") Visualizing spaCy] | ||||||
| 
 | 
 | ||||||
| +h(3, "features-loading") Loading |  | ||||||
| 
 |  | ||||||
| +aside-code("Example"). |  | ||||||
|     nlp = spacy.load('en') # shortcut link |  | ||||||
|     nlp = spacy.load('en_core_web_sm') # package |  | ||||||
|     nlp = spacy.load('/path/to/en') # unicode path |  | ||||||
|     nlp = spacy.load(Path('/path/to/en')) # pathlib Path |  | ||||||
| 
 |  | ||||||
| p |  | ||||||
|     |  The improved #[code spacy.load] makes loading models easier and more |  | ||||||
|     |  transparent. You can load a model by supplying its |  | ||||||
|     |  #[+a("/docs/usage/models#usage") shortcut link], the name of an installed |  | ||||||
|     |  #[+a("/docs/usage/saving-loading#generating") model package], a unicode |  | ||||||
|     |  path or a #[code Path]-like object. spaCy will try resolving the load |  | ||||||
|     |  argument in this order. The #[code path] keyword argument is now deprecated. |  | ||||||
| 
 |  | ||||||
| p |  | ||||||
|     |  The #[code Language] class to initialise will be determined based on the |  | ||||||
|     |  model's settings. If no model is found, spaCy will let you know and won't |  | ||||||
|     |  just return an empty #[code Language] object anymore. If you want a blank |  | ||||||
|     |  language, you can always import the class directly, e.g. |  | ||||||
|     |  #[code from spacy.lang.en import English]. |  | ||||||
| 
 |  | ||||||
| +infobox |  | ||||||
|     |  #[strong API:] #[+api("spacy#load") #[code spacy.load]] |  | ||||||
|     |  #[strong Usage:] #[+a("/docs/usage/saving-loading") Saving and loading] |  | ||||||
| 
 |  | ||||||
| +h(3, "features-language") Improved language data and lazy loading | +h(3, "features-language") Improved language data and lazy loading | ||||||
| 
 | 
 | ||||||
| p | p | ||||||
|  | @ -65,46 +97,15 @@ p | ||||||
|     |  complex regular expressions. The language data has also been tidied up |     |  complex regular expressions. The language data has also been tidied up | ||||||
|     |  and simplified. It's now also possible to overwrite the functions that |     |  and simplified. It's now also possible to overwrite the functions that | ||||||
|     |  compute lexical attributes like #[code like_num], and supply |     |  compute lexical attributes like #[code like_num], and supply | ||||||
|     |  language-specific syntax iterators, e.g. to determine noun chunks. |     |  language-specific syntax iterators, e.g. to determine noun chunks. spaCy | ||||||
|  |     |  now also supports simple lookup-based lemmatization. The data is stored | ||||||
|  |     |  in a dictionary mapping a string to its lemma. | ||||||
| 
 | 
 | ||||||
| +infobox | +infobox | ||||||
|  |     |  #[strong API:] #[+api("language") #[code Language]] | ||||||
|     |  #[strong Code:] #[+src(gh("spaCy", "spacy/lang")) spacy/lang] |     |  #[strong Code:] #[+src(gh("spaCy", "spacy/lang")) spacy/lang] | ||||||
|     |  #[strong Usage:] #[+a("/docs/usage/adding-languages") Adding languages] |     |  #[strong Usage:] #[+a("/docs/usage/adding-languages") Adding languages] | ||||||
| 
 | 
 | ||||||
| +h(3, "features-pipelines") Improved processing pipelines |  | ||||||
| 
 |  | ||||||
| +aside-code("Example"). |  | ||||||
|     from spacy.language import Language |  | ||||||
|     nlp = Language(pipeline=['token_vectors', 'tags', |  | ||||||
|                              'dependencies']) |  | ||||||
| 
 |  | ||||||
| +infobox |  | ||||||
|     |  #[strong API:] #[+api("language") #[code Language]] |  | ||||||
|     |  #[strong Usage:] #[+a("/docs/usage/processing-text") Processing text] |  | ||||||
| 
 |  | ||||||
| +h(3, "features-lemmatizer") Simple lookup-based lemmatization |  | ||||||
| 
 |  | ||||||
| +aside-code("Example"). |  | ||||||
|     LOOKUP = { |  | ||||||
|         "aba": "abar", |  | ||||||
|         "ababa": "abar", |  | ||||||
|         "ababais": "abar", |  | ||||||
|         "ababan": "abar", |  | ||||||
|         "ababanes": "ababán" |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
| p |  | ||||||
|     |  spaCy now supports simple lookup-based lemmatization. The data is stored |  | ||||||
|     |  in a dictionary mapping a string to its lemma. To determine a token's |  | ||||||
|     |  lemma, spaCy simply looks it up in the table. The lookup lemmatizer can |  | ||||||
|     |  be imported from #[code spacy.lemmatizerlookup]. It's initialised with |  | ||||||
|     |  the lookup table, and should be returned by the #[code create_lemmatizer] |  | ||||||
|     |  classmethod of the language's defaults. |  | ||||||
| 
 |  | ||||||
| +infobox |  | ||||||
|     |  #[strong API:] #[+api("language") #[code Language]] |  | ||||||
|     |  #[strong Usage:] #[+a("/docs/usage/adding-languages") Adding languages] |  | ||||||
| 
 |  | ||||||
| +h(3, "features-matcher") Revised matcher API | +h(3, "features-matcher") Revised matcher API | ||||||
| 
 | 
 | ||||||
| +aside-code("Example"). | +aside-code("Example"). | ||||||
|  | @ -129,12 +130,6 @@ p | ||||||
|     |  #[strong API:] #[+api("matcher") #[code Matcher]] |     |  #[strong API:] #[+api("matcher") #[code Matcher]] | ||||||
|     |  #[strong Usage:] #[+a("/docs/usage/rule-based-matching") Rule-based matching] |     |  #[strong Usage:] #[+a("/docs/usage/rule-based-matching") Rule-based matching] | ||||||
| 
 | 
 | ||||||
| +h(3, "features-serializer") Serialization |  | ||||||
| 
 |  | ||||||
| +infobox |  | ||||||
|     |  #[strong API:] #[+api("serializer") #[code Serializer]] |  | ||||||
|     |  #[strong Usage:] #[+a("/docs/usage/saving-loading") Saving and loading] |  | ||||||
| 
 |  | ||||||
| +h(3, "features-models") Neural network models for English, German, French and Spanish | +h(3, "features-models") Neural network models for English, German, French and Spanish | ||||||
| 
 | 
 | ||||||
| +infobox | +infobox | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user