mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Update Language API docs
This commit is contained in:
		
							parent
							
								
									0adadcb3f0
								
							
						
					
					
						commit
						e370332fb1
					
				|  | @ -4,7 +4,14 @@ include ../_includes/_mixins | ||||||
| 
 | 
 | ||||||
| p | p | ||||||
|     |  Usually you'll load this once per process as #[code nlp] and pass the |     |  Usually you'll load this once per process as #[code nlp] and pass the | ||||||
|     |  instance around your application. |     |  instance around your application. The #[code Language] class is created | ||||||
|  |     |  when you call #[+api("spacy#load") #[code spacy.load()]] and contains | ||||||
|  |     |  the shared vocabulary and #[+a("/usage/adding-languages") language data], | ||||||
|  |     |  optional model data loaded from a #[+a("/models") model package] or | ||||||
|  |     |  a path, and a #[+a("/usage/processing-pipelines") processing pipeline] | ||||||
|  |     |  containing components like the tagger or parser that are called on a | ||||||
|  |     |  document in order. You can also add your own processing pipeline | ||||||
|  |     |  components that take a #[code Doc] object, modify it and return it. | ||||||
| 
 | 
 | ||||||
| +h(2, "init") Language.__init__ | +h(2, "init") Language.__init__ | ||||||
|     +tag method |     +tag method | ||||||
|  | @ -12,9 +19,9 @@ p | ||||||
| p Initialise a #[code Language] object. | p Initialise a #[code Language] object. | ||||||
| 
 | 
 | ||||||
| +aside-code("Example"). | +aside-code("Example"). | ||||||
|  |     from spacy.vocab import Vocab | ||||||
|     from spacy.language import Language |     from spacy.language import Language | ||||||
|     nlp = Language(pipeline=['token_vectors', 'tags', |     nlp = Language(Vocab()) | ||||||
|                              'dependencies']) |  | ||||||
| 
 | 
 | ||||||
|     from spacy.lang.en import English |     from spacy.lang.en import English | ||||||
|     nlp = English() |     nlp = English() | ||||||
|  | @ -34,14 +41,6 @@ p Initialise a #[code Language] object. | ||||||
|             |  A function that takes text and returns a #[code Doc] object. |             |  A function that takes text and returns a #[code Doc] object. | ||||||
|             |  Usually a #[code Tokenizer]. |             |  Usually a #[code Tokenizer]. | ||||||
| 
 | 
 | ||||||
|     +row |  | ||||||
|         +cell #[code pipeline] |  | ||||||
|         +cell list |  | ||||||
|         +cell |  | ||||||
|             |  A list of annotation processes or IDs of annotation, processes, |  | ||||||
|             |  e.g. a #[code Tagger] object, or #[code 'tagger']. IDs are looked |  | ||||||
|             |  up in #[code Language.Defaults.factories]. |  | ||||||
| 
 |  | ||||||
|     +row |     +row | ||||||
|         +cell #[code meta] |         +cell #[code meta] | ||||||
|         +cell dict |         +cell dict | ||||||
|  | @ -54,6 +53,23 @@ p Initialise a #[code Language] object. | ||||||
|         +cell #[code Language] |         +cell #[code Language] | ||||||
|         +cell The newly constructed object. |         +cell The newly constructed object. | ||||||
| 
 | 
 | ||||||
|  | +infobox("Deprecation note", "⚠️") | ||||||
|  |     .o-block | ||||||
|  |         |  To make the processing pipelines and their components more | ||||||
|  |         |  transparent, the #[code pipeline] and #[code disable] arguments on | ||||||
|  |         |  initialisation are now deprecated. Instead, pipeline components can | ||||||
|  |         |  now be added, removed and rearranged using the new #[code Language] | ||||||
|  |         |  methods, for example #[+api("language#add_pipe") #[code add_pipe]] or | ||||||
|  |         |  #[+api("language#create_pipe") #[code create_pipe]]. This is also how | ||||||
|  |         |  #[+api("spacy#load") #[code spacy.load()]] creates the | ||||||
|  |         |  #[code Language] instance it returns. | ||||||
|  | 
 | ||||||
|  |     +code-new. | ||||||
|  |         nlp = English() | ||||||
|  |         parser = nlp.create_pipe('parser') | ||||||
|  |         nlp.add_pipe(parser) | ||||||
|  |     +code-old nlp = English(pipeline=['parser']) | ||||||
|  | 
 | ||||||
| +h(2, "call") Language.__call__ | +h(2, "call") Language.__call__ | ||||||
|     +tag method |     +tag method | ||||||
| 
 | 
 | ||||||
|  | @ -235,7 +251,6 @@ p | ||||||
|     |  Can be called before training to pre-process gold data. By default, it |     |  Can be called before training to pre-process gold data. By default, it | ||||||
|     |  handles nonprojectivity and adds missing tags to the tag map. |     |  handles nonprojectivity and adds missing tags to the tag map. | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| +table(["Name", "Type", "Description"]) | +table(["Name", "Type", "Description"]) | ||||||
|     +row |     +row | ||||||
|         +cell #[code docs_golds] |         +cell #[code docs_golds] | ||||||
|  | @ -247,6 +262,177 @@ p | ||||||
|         +cell tuple |         +cell tuple | ||||||
|         +cell Tuples of #[code Doc] and #[code GoldParse] objects. |         +cell Tuples of #[code Doc] and #[code GoldParse] objects. | ||||||
| 
 | 
 | ||||||
|  | +h(2, "create_pipe") Language.create_pipe | ||||||
|  |     +tag method | ||||||
|  |     +tag-new(2) | ||||||
|  | 
 | ||||||
|  | p Create a pipeline component from a factory. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     parser = nlp.create_pipe('parser') | ||||||
|  |     nlp.add_pipe(parser) | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell #[code name] | ||||||
|  |         +cell unicode | ||||||
|  |         +cell | ||||||
|  |             |  Factory name to look up in | ||||||
|  |             |  #[+api("language#class-attributes") #[code Language.factories]]. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code config] | ||||||
|  |         +cell dict | ||||||
|  |         +cell Configuration parameters to initialise component. | ||||||
|  | 
 | ||||||
|  |     +row("foot") | ||||||
|  |         +cell returns | ||||||
|  |         +cell callable | ||||||
|  |         +cell The pipeline component. | ||||||
|  | 
 | ||||||
|  | +h(2, "add_pipe") Language.add_pipe | ||||||
|  |     +tag method | ||||||
|  |     +tag-new(2) | ||||||
|  | 
 | ||||||
|  | p | ||||||
|  |     |  Add a component to the processing pipeline. Valid components are | ||||||
|  |     |  callables that take a #[code Doc] object, modify it and return it. Only | ||||||
|  |     |  one of #[code before], #[code after], #[code first] or #[code last] can | ||||||
|  |     |  be set. Default behaviour is #[code last=True]. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     def component(doc): | ||||||
|  |         # modify Doc and return it | ||||||
|  |         return doc | ||||||
|  | 
 | ||||||
|  |     nlp.add_pipe(component, before='ner') | ||||||
|  |     nlp.add_pipe(component, name='custom_name', last=True) | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell #[code component] | ||||||
|  |         +cell callable | ||||||
|  |         +cell The pipeline component. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code name] | ||||||
|  |         +cell unicode | ||||||
|  |         +cell | ||||||
|  |             |  Name of pipeline component. Overwrites existing | ||||||
|  |             |  #[code component.name] attribute if available. If no #[code name] | ||||||
|  |             |  is set and the component exposes no name attribute, | ||||||
|  |             |  #[code component.__name__] is used. An error is raised if the | ||||||
|  |             |  name already exists in the pipeline. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code before] | ||||||
|  |         +cell unicode | ||||||
|  |         +cell Component name to insert component directly before. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code after] | ||||||
|  |         +cell unicode | ||||||
|  |         +cell Component name to insert component directly after: | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code first] | ||||||
|  |         +cell bool | ||||||
|  |         +cell Insert component first / not first in the pipeline. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code last] | ||||||
|  |         +cell bool | ||||||
|  |         +cell Insert component last / not last in the pipeline. | ||||||
|  | 
 | ||||||
|  | +h(2, "get_pipe") Language.get_pipe | ||||||
|  |     +tag method | ||||||
|  |     +tag-new(2) | ||||||
|  | 
 | ||||||
|  | p Get a pipeline component for a given component name. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     parser = nlp.get_pipe('parser') | ||||||
|  |     custom_component = nlp.get_pipe('custom_component') | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell #[code name] | ||||||
|  |         +cell unicode | ||||||
|  |         +cell Name of the pipeline component to get. | ||||||
|  | 
 | ||||||
|  |     +row("foot") | ||||||
|  |         +cell returns | ||||||
|  |         +cell callable | ||||||
|  |         +cell The pipeline component. | ||||||
|  | 
 | ||||||
|  | +h(2, "replace_pipe") Language.replace_pipe | ||||||
|  |     +tag method | ||||||
|  |     +tag-new(2) | ||||||
|  | 
 | ||||||
|  | p Replace a component in the pipeline. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     nlp.replace_pipe('parser', my_custom_parser) | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell #[code name] | ||||||
|  |         +cell unicode | ||||||
|  |         +cell Name of the component to replace. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code component] | ||||||
|  |         +cell callable | ||||||
|  |         +cell The pipeline component to inser. | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | +h(2, "rename_pipe") Language.rename_pipe | ||||||
|  |     +tag method | ||||||
|  |     +tag-new(2) | ||||||
|  | 
 | ||||||
|  | p | ||||||
|  |     |  Rename a component in the pipeline. Useful to create custom names for | ||||||
|  |     |  pre-defined and pre-loaded components. To change the default name of | ||||||
|  |     |  a component added to the pipeline, you can also use the #[code name] | ||||||
|  |     |  argument on #[+api("language#add_pipe") #[code add_pipe]]. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     nlp.rename_pipe('parser', 'spacy_parser') | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell #[code old_name] | ||||||
|  |         +cell unicode | ||||||
|  |         +cell Name of the component to rename. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code new_name] | ||||||
|  |         +cell unicode | ||||||
|  |         +cell New name of the component. | ||||||
|  | 
 | ||||||
|  | +h(2, "remove_pipe") Language.remove_pipe | ||||||
|  |     +tag method | ||||||
|  |     +tag-new(2) | ||||||
|  | 
 | ||||||
|  | p | ||||||
|  |     |  Remove a component from the pipeline. Returns the removed component name | ||||||
|  |     |  and component function. | ||||||
|  | 
 | ||||||
|  | +aside-code("Example"). | ||||||
|  |     name, component = nlp.remove_pipe('parser') | ||||||
|  |     assert name == 'parser' | ||||||
|  | 
 | ||||||
|  | +table(["Name", "Type", "Description"]) | ||||||
|  |     +row | ||||||
|  |         +cell #[code name] | ||||||
|  |         +cell unicode | ||||||
|  |         +cell Name of the component to remove. | ||||||
|  | 
 | ||||||
|  |     +row("foot") | ||||||
|  |         +cell returns | ||||||
|  |         +cell tuple | ||||||
|  |         +cell A #[code (name, component)] tuple of the removed component. | ||||||
|  | 
 | ||||||
| +h(2, "to_disk") Language.to_disk | +h(2, "to_disk") Language.to_disk | ||||||
|     +tag method |     +tag method | ||||||
|     +tag-new(2) |     +tag-new(2) | ||||||
|  | @ -399,7 +585,15 @@ p Load state from a binary string. | ||||||
|     +row |     +row | ||||||
|         +cell #[code pipeline] |         +cell #[code pipeline] | ||||||
|         +cell list |         +cell list | ||||||
|         +cell Sequence of annotation functions. |         +cell | ||||||
|  |             |  List of #[code (name, component)] tuples describing the current | ||||||
|  |             |  processing pipeline, in order. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code pipe_names] | ||||||
|  |             +tag-new(2) | ||||||
|  |         +cell list | ||||||
|  |         +cell List of pipeline component names, in order. | ||||||
| 
 | 
 | ||||||
|     +row |     +row | ||||||
|         +cell #[code meta] |         +cell #[code meta] | ||||||
|  | @ -424,3 +618,12 @@ p Load state from a binary string. | ||||||
|         +cell |         +cell | ||||||
|             |  Two-letter language ID, i.e. |             |  Two-letter language ID, i.e. | ||||||
|             |  #[+a("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") ISO code]. |             |  #[+a("https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes") ISO code]. | ||||||
|  | 
 | ||||||
|  |     +row | ||||||
|  |         +cell #[code factories] | ||||||
|  |             +tag-new(2) | ||||||
|  |         +cell dict | ||||||
|  |         +cell | ||||||
|  |             |  Factories that create pre-defined pipeline components, e.g. the | ||||||
|  |             |  tagger, parser or entity recognizer, keyed by their component | ||||||
|  |             |  name. | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user