mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			39 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			39 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| //- 💫 DOCS > USAGE > PROCESSING PIPELINES > SERIALIZATION
 | |
| 
 | |
| include ../_spacy-101/_serialization
 | |
| 
 | |
| +infobox("Important note")
 | |
|     |  In spaCy v2.0, the API for saving and loading has changed to only use the
 | |
|     |  four methods listed above consistently across objects and classes. For an
 | |
|     |  overview of the changes, see #[+a("/usage/v2#incompat") this table]
 | |
|     |  and the notes on #[+a("/usage/v2#migrating-saving-loading") migrating].
 | |
| 
 | |
| +h(3, "example-doc") Example: Saving and loading a document
 | |
| 
 | |
| p
 | |
|     |  For simplicity, let's assume you've
 | |
|     |  #[+a("/usage/linguistic-features#setting-entities") added custom entities] to
 | |
|     |  a #[code Doc], either manually, or by using a
 | |
|     |  #[+a("/usage/linguistic-features#on_match") match pattern]. You can
 | |
|     |  save it locally by calling #[+api("doc#to_disk") #[code Doc.to_disk()]],
 | |
|     |  and load it again via #[+api("doc#from_disk") #[code Doc.from_disk()]].
 | |
|     |  This will overwrite the existing object and return it.
 | |
| 
 | |
| +code.
 | |
|     import spacy
 | |
|     from spacy.tokens import Span
 | |
| 
 | |
|     text = u'Netflix is hiring a new VP of global policy'
 | |
| 
 | |
|     nlp = spacy.load('en')
 | |
|     doc = nlp(text)
 | |
|     assert len(doc.ents) == 0 # Doc has no entities
 | |
|     doc.ents += ((Span(doc, 0, 1, label=doc.vocab.strings[u'ORG'])) # add entity
 | |
|     doc.to_disk('/path/to/doc') # save Doc to disk
 | |
| 
 | |
|     new_doc = nlp(text)
 | |
|     assert len(new_doc.ents) == 0 # new Doc has no entities
 | |
|     new_doc = new_doc.from_disk('path/to/doc') # load from disk and overwrite
 | |
|     assert len(new_doc.ents) == 1 # entity is now recognised!
 | |
|     assert [(ent.text, ent.label_) for ent in new_doc.ents] == [(u'Netflix', u'ORG')]
 |