mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Update section on new v2.0 features
This commit is contained in:
		
							parent
							
								
									f4658ff053
								
							
						
					
					
						commit
						c25f3133ca
					
				|  | @ -8,6 +8,65 @@ p | |||
| 
 | ||||
| +h(2, "features") New features | ||||
| 
 | ||||
| +h(3, "features-pipelines") Improved processing pipelines | ||||
| 
 | ||||
| +aside-code("Example"). | ||||
|     # Modify an existing pipeline | ||||
|     nlp = spacy.load('en') | ||||
|     nlp.pipeline.append(my_component) | ||||
| 
 | ||||
|     # Register a factory to create a component | ||||
|     spacy.set_factory('my_factory', my_factory) | ||||
|     nlp = Language(pipeline=['my_factory', mycomponent]) | ||||
| 
 | ||||
| p | ||||
|     |  It's now much easier to customise the pipeline with your own components. | ||||
|     |  Components are functions that receive a #[code Doc] object, modify and | ||||
|     |  return it. If your component is stateful, you'll want to create a new one | ||||
|     |  for each pipeline. You can do that by defining and registering a factory | ||||
|     |  which receives the shared #[code Vocab] object and returns a component. | ||||
| 
 | ||||
| p | ||||
|     |  spaCy's default components – the vectorizer, tagger, parser and entity | ||||
|     |  recognizer, can be added to your pipeline by using their string IDs. | ||||
|     |  This way, you won't have to worry about finding and implementing them – | ||||
|     |  to use the default tagger, simply add #[code "tagger"] to the pipeline, | ||||
|     |  and spaCy will know what to do. | ||||
| 
 | ||||
| +infobox | ||||
|     |  #[strong API:] #[+api("language") #[code Language]] | ||||
|     |  #[strong Usage:] #[+a("/docs/usage/language-processing-pipeline") Processing text] | ||||
| 
 | ||||
| +h(3, "features-serializer") Saving, loading and serialization | ||||
| 
 | ||||
| +aside-code("Example"). | ||||
|     nlp = spacy.load('en') # shortcut link | ||||
|     nlp = spacy.load('en_core_web_sm') # package | ||||
|     nlp = spacy.load('/path/to/en') # unicode path | ||||
|     nlp = spacy.load(Path('/path/to/en')) # pathlib Path | ||||
| 
 | ||||
|     nlp.to_disk('/path/to/nlp') | ||||
|     nlp = English().from_disk('/path/to/nlp') | ||||
| 
 | ||||
| p | ||||
|     |  spay's serialization API has been made consistent across classes and | ||||
|     |  objects. All container classes and pipeline components now have a | ||||
|     |  #[code to_bytes()], #[code from_bytes()], #[code to_disk()] and | ||||
|     |  #[code from_disk()] method that supports the Pickle protocol. | ||||
| 
 | ||||
| p | ||||
|     |  The improved #[code spacy.load] makes loading models easier and more | ||||
|     |  transparent. You can load a model by supplying its | ||||
|     |  #[+a("/docs/usage/models#usage") shortcut link], the name of an installed | ||||
|     |  #[+a("/docs/usage/saving-loading#generating") model package] or a path. | ||||
|     |  The #[code Language] class to initialise will be determined based on the | ||||
|     |  model's settings. For a blank language, you can import the class directly, | ||||
|     |  e.g. #[code from spacy.lang.en import English]. | ||||
| 
 | ||||
| +infobox | ||||
|     |  #[strong API:] #[+api("spacy#load") #[code spacy.load]], #[+api("binder") #[code Binder]] | ||||
|     |  #[strong Usage:] #[+a("/docs/usage/saving-loading") Saving and loading] | ||||
| 
 | ||||
| +h(3, "features-displacy") displaCy visualizer with Jupyter support | ||||
| 
 | ||||
| +aside-code("Example"). | ||||
|  | @ -28,33 +87,6 @@ p | |||
|     |  #[strong API:] #[+api("displacy") #[code displacy]] | ||||
|     |  #[strong Usage:] #[+a("/docs/usage/visualizers") Visualizing spaCy] | ||||
| 
 | ||||
| +h(3, "features-loading") Loading | ||||
| 
 | ||||
| +aside-code("Example"). | ||||
|     nlp = spacy.load('en') # shortcut link | ||||
|     nlp = spacy.load('en_core_web_sm') # package | ||||
|     nlp = spacy.load('/path/to/en') # unicode path | ||||
|     nlp = spacy.load(Path('/path/to/en')) # pathlib Path | ||||
| 
 | ||||
| p | ||||
|     |  The improved #[code spacy.load] makes loading models easier and more | ||||
|     |  transparent. You can load a model by supplying its | ||||
|     |  #[+a("/docs/usage/models#usage") shortcut link], the name of an installed | ||||
|     |  #[+a("/docs/usage/saving-loading#generating") model package], a unicode | ||||
|     |  path or a #[code Path]-like object. spaCy will try resolving the load | ||||
|     |  argument in this order. The #[code path] keyword argument is now deprecated. | ||||
| 
 | ||||
| p | ||||
|     |  The #[code Language] class to initialise will be determined based on the | ||||
|     |  model's settings. If no model is found, spaCy will let you know and won't | ||||
|     |  just return an empty #[code Language] object anymore. If you want a blank | ||||
|     |  language, you can always import the class directly, e.g. | ||||
|     |  #[code from spacy.lang.en import English]. | ||||
| 
 | ||||
| +infobox | ||||
|     |  #[strong API:] #[+api("spacy#load") #[code spacy.load]] | ||||
|     |  #[strong Usage:] #[+a("/docs/usage/saving-loading") Saving and loading] | ||||
| 
 | ||||
| +h(3, "features-language") Improved language data and lazy loading | ||||
| 
 | ||||
| p | ||||
|  | @ -65,46 +97,15 @@ p | |||
|     |  complex regular expressions. The language data has also been tidied up | ||||
|     |  and simplified. It's now also possible to overwrite the functions that | ||||
|     |  compute lexical attributes like #[code like_num], and supply | ||||
|     |  language-specific syntax iterators, e.g. to determine noun chunks. | ||||
|     |  language-specific syntax iterators, e.g. to determine noun chunks. spaCy | ||||
|     |  now also supports simple lookup-based lemmatization. The data is stored | ||||
|     |  in a dictionary mapping a string to its lemma. | ||||
| 
 | ||||
| +infobox | ||||
|     |  #[strong API:] #[+api("language") #[code Language]] | ||||
|     |  #[strong Code:] #[+src(gh("spaCy", "spacy/lang")) spacy/lang] | ||||
|     |  #[strong Usage:] #[+a("/docs/usage/adding-languages") Adding languages] | ||||
| 
 | ||||
| +h(3, "features-pipelines") Improved processing pipelines | ||||
| 
 | ||||
| +aside-code("Example"). | ||||
|     from spacy.language import Language | ||||
|     nlp = Language(pipeline=['token_vectors', 'tags', | ||||
|                              'dependencies']) | ||||
| 
 | ||||
| +infobox | ||||
|     |  #[strong API:] #[+api("language") #[code Language]] | ||||
|     |  #[strong Usage:] #[+a("/docs/usage/processing-text") Processing text] | ||||
| 
 | ||||
| +h(3, "features-lemmatizer") Simple lookup-based lemmatization | ||||
| 
 | ||||
| +aside-code("Example"). | ||||
|     LOOKUP = { | ||||
|         "aba": "abar", | ||||
|         "ababa": "abar", | ||||
|         "ababais": "abar", | ||||
|         "ababan": "abar", | ||||
|         "ababanes": "ababán" | ||||
|     } | ||||
| 
 | ||||
| p | ||||
|     |  spaCy now supports simple lookup-based lemmatization. The data is stored | ||||
|     |  in a dictionary mapping a string to its lemma. To determine a token's | ||||
|     |  lemma, spaCy simply looks it up in the table. The lookup lemmatizer can | ||||
|     |  be imported from #[code spacy.lemmatizerlookup]. It's initialised with | ||||
|     |  the lookup table, and should be returned by the #[code create_lemmatizer] | ||||
|     |  classmethod of the language's defaults. | ||||
| 
 | ||||
| +infobox | ||||
|     |  #[strong API:] #[+api("language") #[code Language]] | ||||
|     |  #[strong Usage:] #[+a("/docs/usage/adding-languages") Adding languages] | ||||
| 
 | ||||
| +h(3, "features-matcher") Revised matcher API | ||||
| 
 | ||||
| +aside-code("Example"). | ||||
|  | @ -129,12 +130,6 @@ p | |||
|     |  #[strong API:] #[+api("matcher") #[code Matcher]] | ||||
|     |  #[strong Usage:] #[+a("/docs/usage/rule-based-matching") Rule-based matching] | ||||
| 
 | ||||
| +h(3, "features-serializer") Serialization | ||||
| 
 | ||||
| +infobox | ||||
|     |  #[strong API:] #[+api("serializer") #[code Serializer]] | ||||
|     |  #[strong Usage:] #[+a("/docs/usage/saving-loading") Saving and loading] | ||||
| 
 | ||||
| +h(3, "features-models") Neural network models for English, German, French and Spanish | ||||
| 
 | ||||
| +infobox | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user