spaCy/website/usage/_processing-pipelines/_serialization.jade

//- 💫 DOCS > USAGE > PROCESSING PIPELINES > SERIALIZATION

include ../_spacy-101/_serialization

+infobox("Important note")
    |  In spaCy v2.0, the API for saving and loading has changed to only use the
    |  four methods listed above consistently across objects and classes. For an
    |  overview of the changes, see #[+a("/usage/v2#incompat") this table]
    |  and the notes on #[+a("/usage/v2#migrating-saving-loading") migrating].

+h(3, "example-doc") Example: Saving and loading a document

p
    |  For simplicity, let's assume you've
    |  #[+a("/usage/linguistic-features#setting-entities") added custom entities] to
    |  a #[code Doc], either manually, or by using a
    |  #[+a("/usage/linguistic-features#on_match") match pattern]. You can
    |  save it locally by calling #[+api("doc#to_disk") #[code Doc.to_disk()]],
    |  and load it again via #[+api("doc#from_disk") #[code Doc.from_disk()]].
    |  This will overwrite the existing object and return it.

+code.
    import spacy
    from spacy.tokens import Span

    text = u'Netflix is hiring a new VP of global policy'

    nlp = spacy.load('en')
    doc = nlp(text)
    assert len(doc.ents) == 0 # Doc has no entities
    doc.ents += ((Span(doc, 0, 1, label=doc.vocab.strings[u'ORG'])) # add entity
    doc.to_disk('/path/to/doc') # save Doc to disk

    new_doc = nlp(text)
    assert len(new_doc.ents) == 0 # new Doc has no entities
    new_doc = new_doc.from_disk('path/to/doc') # load from disk and overwrite
    assert len(new_doc.ents) == 1 # entity is now recognised!
    assert [(ent.text, ent.label_) for ent in new_doc.ents] == [(u'Netflix', u'ORG')]