mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
39 lines
1.6 KiB
Plaintext
39 lines
1.6 KiB
Plaintext
//- 💫 DOCS > USAGE > PROCESSING PIPELINES > SERIALIZATION
|
|
|
|
include ../_spacy-101/_serialization
|
|
|
|
+infobox("Important note")
|
|
| In spaCy v2.0, the API for saving and loading has changed to only use the
|
|
| four methods listed above consistently across objects and classes. For an
|
|
| overview of the changes, see #[+a("/usage/v2#incompat") this table]
|
|
| and the notes on #[+a("/usage/v2#migrating-saving-loading") migrating].
|
|
|
|
+h(3, "example-doc") Example: Saving and loading a document
|
|
|
|
p
|
|
| For simplicity, let's assume you've
|
|
| #[+a("/usage/entity-recognition#setting") added custom entities] to
|
|
| a #[code Doc], either manually, or by using a
|
|
| #[+a("/usage/rule-based-matching#on_match") match pattern]. You can
|
|
| save it locally by calling #[+api("doc#to_disk") #[code Doc.to_disk()]],
|
|
| and load it again via #[+api("doc#from_disk") #[code Doc.from_disk()]].
|
|
| This will overwrite the existing object and return it.
|
|
|
|
+code.
|
|
import spacy
|
|
from spacy.tokens import Span
|
|
|
|
text = u'Netflix is hiring a new VP of global policy'
|
|
|
|
nlp = spacy.load('en')
|
|
doc = nlp(text)
|
|
assert len(doc.ents) == 0 # Doc has no entities
|
|
doc.ents += ((Span(doc, 0, 1, label=doc.vocab.strings[u'ORG'])) # add entity
|
|
doc.to_disk('/path/to/doc') # save Doc to disk
|
|
|
|
new_doc = nlp(text)
|
|
assert len(new_doc.ents) == 0 # new Doc has no entities
|
|
new_doc = new_doc.from_disk('path/to/doc') # load from disk and overwrite
|
|
assert len(new_doc.ents) == 1 # entity is now recognised!
|
|
assert [(ent.text, ent.label_) for ent in new_doc.ents] == [(u'Netflix', u'ORG')]
|