spaCy/website/docs/api/spacy.jade

//- 💫 DOCS > API > SPACY

include ../../_includes/_mixins

+h(2, "load") spacy.load
    +tag function
    +tag-model

p
    |  Load a model via its #[+a("/docs/usage/models#usage") shortcut link],
    |  the name of an installed
    |  #[+a("/docs/usage/saving-loading#generating") model package], a unicode
    |  path or a #[code Path]-like object. spaCy will try resolving the load
    |  argument in this order. If a model is loaded from a shortcut link or
    |  package name, spaCy will assume it's a Python package and import it and
    |  call the model's own #[code load()] method. If a model is loaded from a
    |  path, spaCy will assume it's a data directory, read the language and
    |  pipeline settings off the meta.json and initialise the #[code Language]
    |  class. The data will be loaded in via
    |  #[+api("language#from_disk") #[code Language.from_disk()]].

+aside-code("Example").
    nlp = spacy.load('en') # shortcut link
    nlp = spacy.load('en_core_web_sm') # package
    nlp = spacy.load('/path/to/en') # unicode path
    nlp = spacy.load(Path('/path/to/en')) # pathlib Path

    nlp = spacy.load('en', disable=['parser', 'tagger'])

+table(["Name", "Type", "Description"])
    +row
        +cell #[code name]
        +cell unicode or #[code Path]
        +cell Model to load, i.e. shortcut link, package name or path.

    +row
        +cell #[code disable]
        +cell list
        +cell
            |  Names of pipeline components to
            |  #[+a("/docs/usage/language-processing-pipeline#disabling") disable].

    +footrow
        +cell returns
        +cell #[code Language]
        +cell A #[code Language] object with the loaded model.

+infobox("⚠️ Deprecation note")
    .o-block
        |  As of spaCy 2.0, the #[code path] keyword argument is deprecated. spaCy
        |  will also raise an error if no model could be loaded and never just
        |  return an empty #[code Language] object. If you need a blank language,
        |  you need to import it explicitly (#[code from spacy.lang.en import English])
        |  or use #[+api("util#get_lang_class") #[code util.get_lang_class]].

    +code-new nlp = spacy.load('/model')
    +code-old nlp = spacy.load('en', path='/model')

+h(2, "info") spacy.info
    +tag function

p
    |  The same as the #[+api("cli#info") #[code info] command]. Pretty-print
    |  information about your installation, models and local setup from within
    |  spaCy. To get the model meta data as a dictionary instead, you can
    |  use the #[code meta] attribute on your #[code nlp] object with a
    |  loaded model, e.g. #[code nlp['meta']].

+aside-code("Example").
    spacy.info()
    spacy.info('en')
    spacy.info('de', markdown=True)

+table(["Name", "Type", "Description"])
    +row
        +cell #[code model]
        +cell unicode
        +cell A model, i.e. shortcut link, package name or path (optional).

    +row
        +cell #[code markdown]
        +cell bool
        +cell Print information as Markdown.


+h(2, "explain") spacy.explain
    +tag function

p
    |  Get a description for a given POS tag, dependency label or entity type.
    |  For a list of available terms, see
    |  #[+src(gh("spacy", "spacy/glossary.py")) glossary.py].

+aside-code("Example").
    spacy.explain('NORP')
    # Nationalities or religious or political groups

    doc = nlp(u'Hello world')
    for word in doc:
        print(word.text, word.tag_, spacy.explain(word.tag_))
    # Hello UH interjection
    # world NN noun, singular or mass

+table(["Name", "Type", "Description"])
    +row
        +cell #[code term]
        +cell unicode
        +cell Term to explain.

    +footrow
        +cell returns
        +cell unicode
        +cell The explanation, or #[code None] if not found in the glossary.

+h(2, "set_factory") spacy.set_factory
    +tag function
    +tag-new(2)

p
    |  Set a factory that returns a custom
    |  #[+a("/docs/usage/language-processing-pipeline") processing pipeline]
    |  component. Factories are useful for creating stateful components, especially ones which depend on shared data.

+aside-code("Example").
    def my_factory(vocab):
        def my_component(doc):
            return doc
        return my_component

    spacy.set_factory('my_factory', my_factory)
    nlp = Language(pipeline=['my_factory'])

+table(["Name", "Type", "Description"])
    +row
        +cell #[code factory_id]
        +cell unicode
        +cell
            |  Unique name of factory. If added to a new pipeline, spaCy will
            |  look up the factory for this ID and use it to create the
            |  component.

    +row
        +cell #[code factory]
        +cell callable
        +cell
            |  Callable that takes a #[code Vocab] object and returns a pipeline
            |  component.