Update usage workflows

This commit is contained in:
ines 2017-05-24 11:59:08 +02:00
parent 66088851dc
commit 8b86b08bed
8 changed files with 55 additions and 81 deletions

View File

@ -225,7 +225,7 @@ p
p p
| Print a formatted, text-wrapped message with optional title. If a text | Print a formatted, text-wrapped message with optional title. If a text
| argument is a #[code Path], it's converted to a string. Should only | argument is a #[code Path], it's converted to a string. Should only
| be used for interactive components like the #[+a("/docs/api/cli") CLI]. | be used for interactive components like the #[+api("cli") cli].
+aside-code("Example"). +aside-code("Example").
data_path = Path('/some/path') data_path = Path('/some/path')

View File

@ -125,7 +125,7 @@
}, },
"saving-loading": { "saving-loading": {
"title": "Saving and loading models" "title": "Saving, loading and data serialization"
}, },
"showcase": { "showcase": {

View File

@ -538,8 +538,8 @@ p
| #[+src(gh("spacy-dev-resources", "training/word_freqs.py")) word_freqs.py] | #[+src(gh("spacy-dev-resources", "training/word_freqs.py")) word_freqs.py]
| script from the spaCy developer resources. Note that your corpus should | script from the spaCy developer resources. Note that your corpus should
| not be preprocessed (i.e. you need punctuation for example). The | not be preprocessed (i.e. you need punctuation for example). The
| #[+a("/docs/api/cli#model") #[code model]] command expects a | #[+api("cli#model") #[code model]] command expects a tab-separated word
| tab-separated word frequencies file with three columns: | frequencies file with three columns:
+list("numbers") +list("numbers")
+item The number of times the word occurred in your language sample. +item The number of times the word occurred in your language sample.
@ -654,13 +654,12 @@ p
| If your corpus uses the | If your corpus uses the
| #[+a("http://universaldependencies.org/docs/format.html") CoNLL-U] format, | #[+a("http://universaldependencies.org/docs/format.html") CoNLL-U] format,
| i.e. files with the extension #[code .conllu], you can use the | i.e. files with the extension #[code .conllu], you can use the
| #[+a("/docs/api/cli#convert") #[code convert]] command to convert it to | #[+api("cli#convert") #[code convert]] command to convert it to spaCy's
| spaCy's #[+a("/docs/api/annotation#json-input") JSON format] for training. | #[+a("/docs/api/annotation#json-input") JSON format] for training.
p p
| Once you have your UD corpus transformed into JSON, you can train your | Once you have your UD corpus transformed into JSON, you can train your
| model use the using spaCy's | model use the using spaCy's #[+api("cli#train") #[code train]] command:
| #[+a("/docs/api/cli#train") #[code train]] command:
+code(false, "bash"). +code(false, "bash").
python -m spacy train [lang] [output_dir] [train_data] [dev_data] [--n_iter] [--parser_L1] [--no_tagger] [--no_parser] [--no_ner] python -m spacy train [lang] [output_dir] [train_data] [dev_data] [--n_iter] [--parser_L1] [--no_tagger] [--no_parser] [--no_ner]

View File

@ -1,38 +0,0 @@
//- 💫 DOCS > USAGE > CUSTOMIZING THE PIPELINE
include ../../_includes/_mixins
p
| spaCy provides several linguistic annotation functions by default. Each
| function takes a Doc object, and modifies it in-place. The default
| pipeline is #[code [nlp.tagger, nlp.entity, nlp.parser]]. spaCy 1.0
| introduced the ability to customise this pipeline with arbitrary
| functions.
+code.
def arbitrary_fixup_rules(doc):
for token in doc:
if token.text == u'bill' and token.tag_ == u'NNP':
token.tag_ = u'NN'
def custom_pipeline(nlp):
return (nlp.tagger, arbitrary_fixup_rules, nlp.parser, nlp.entity)
nlp = spacy.load('en', create_pipeline=custom_pipeline)
p
| The easiest way to customise the pipeline is to pass a
| #[code create_pipeline] callback to the #[code spacy.load()] function.
p
| The callback you pass to #[code create_pipeline] should take a single
| argument, and return a sequence of callables. Each callable in the
| sequence should accept a #[code Doc] object and modify it in place.
p
| Instead of passing a callback, you can also write to the
| #[code .pipeline] attribute directly.
+code.
nlp = spacy.load('en')
nlp.pipeline = [nlp.tagger]

View File

@ -291,7 +291,7 @@ p
| environment variable, as this can lead to unexpected results, especially | environment variable, as this can lead to unexpected results, especially
| when using #[code virtualenv]. Run the command with #[code python -m], | when using #[code virtualenv]. Run the command with #[code python -m],
| for example #[code python -m spacy download en]. For more info on this, | for example #[code python -m spacy download en]. For more info on this,
| see the #[+a("/docs/api/cli#download") CLI documentation]. | see #[+api("cli#download") download].
+h(3, "module-load") 'module' object has no attribute 'load' +h(3, "module-load") 'module' object has no attribute 'load'

View File

@ -10,14 +10,19 @@ p
doc = nlp(u'Hello, world! A three sentence document.\nWith new lines...') doc = nlp(u'Hello, world! A three sentence document.\nWith new lines...')
p p
| The library should perform equally well with short or long documents. | The library should perform equally well with #[strong short or long documents].
| All algorithms are linear-time in the length of the string, and once the | All algorithms are linear-time in the length of the string, and once the
| data is loaded, there's no significant start-up cost to consider. This | data is loaded, there's no significant start-up cost to consider. This
| means that you don't have to strategically merge or split your text — | means that you don't have to strategically merge or split your text —
| you should feel free to feed in either single tweets or whole novels. | you should feel free to feed in either single tweets or whole novels.
p p
| If you run #[code nlp = spacy.load('en')], the #[code nlp] object will | If you run #[+api("spacy#load") #[code spacy.load('en')]], spaCy will
| load the #[+a("/docs/usage/models") model] associated with the name
| #[code 'en']. Each model is a Python package containing an
| #[+src(gh("spacy-dev-resources", "templates/model/en_model_name/__init__.py"))__init__.py]
the #[code nlp] object will
| be an instance of #[code spacy.en.English]. This means that when you run | be an instance of #[code spacy.en.English]. This means that when you run
| #[code doc = nlp(text)], you're executing | #[code doc = nlp(text)], you're executing
| #[code spacy.en.English.__call__], which is implemented on its parent | #[code spacy.en.English.__call__], which is implemented on its parent

View File

@ -1,5 +1,8 @@
include ../../_includes/_mixins include ../../_includes/_mixins
+h(2, "models") Saving models
p p
| After training your model, you'll usually want to save its state, and load | After training your model, you'll usually want to save its state, and load
| it back later. You can do this with the | it back later. You can do this with the
@ -14,28 +17,28 @@ p
| will be written out. To make the model more convenient to deploy, we | will be written out. To make the model more convenient to deploy, we
| recommend wrapping it as a Python package. | recommend wrapping it as a Python package.
+h(2, "generating") Generating a model package +h(3, "models-generating") Generating a model package
+infobox("Important note") +infobox("Important note")
| The model packages are #[strong not suitable] for the public | The model packages are #[strong not suitable] for the public
| #[+a("https://pypi.python.org") pypi.python.org] directory, which is not | #[+a("https://pypi.python.org") pypi.python.org] directory, which is not
| designed for binary data and files over 50 MB. However, if your company | designed for binary data and files over 50 MB. However, if your company
| is running an internal installation of pypi, publishing your models on | is running an #[strong internal installation] of PyPi, publishing your
| there can be a convenient solution to share them with your team. | models on there can be a convenient way to share them with your team.
p p
| spaCy comes with a handy CLI command that will create all required files, | spaCy comes with a handy CLI command that will create all required files,
| and walk you through generating the meta data. You can also create the | and walk you through generating the meta data. You can also create the
| meta.json manually and place it in the model data directory, or supply a | meta.json manually and place it in the model data directory, or supply a
| path to it using the #[code --meta] flag. For more info on this, see the | path to it using the #[code --meta] flag. For more info on this, see
| #[+a("/docs/api/cli#package") #[code package]] command documentation. | the #[+api("cli#package") #[code package]] docs.
+aside-code("meta.json", "json"). +aside-code("meta.json", "json").
{ {
"name": "example_model", "name": "example_model",
"lang": "en", "lang": "en",
"version": "1.0.0", "version": "1.0.0",
"spacy_version": ">=1.7.0,<2.0.0", "spacy_version": ">=2.0.0,<3.0.0",
"description": "Example model for spaCy", "description": "Example model for spaCy",
"author": "You", "author": "You",
"email": "you@example.com", "email": "you@example.com",
@ -58,7 +61,7 @@ p This command will create a model package directory that should look like this:
p p
| You can also find templates for all files in our | You can also find templates for all files in our
| #[+a(gh("spacy-dev-resouces", "templates/model")) spaCy dev resources]. | #[+src(gh("spacy-dev-resouces", "templates/model")) spaCy dev resources].
| If you're creating the package manually, keep in mind that the directories | If you're creating the package manually, keep in mind that the directories
| need to be named according to the naming conventions of | need to be named according to the naming conventions of
| #[code [language]_[name]] and #[code [language]_[name]-[version]]. The | #[code [language]_[name]] and #[code [language]_[name]-[version]]. The
@ -66,44 +69,49 @@ p
| respective #[code Language] class in spaCy, which will later be returned | respective #[code Language] class in spaCy, which will later be returned
| by the model's #[code load()] method. | by the model's #[code load()] method.
+h(2, "building") Building a model package
p p
| To build the package, run the following command from within the | To #[strong build the package], run the following command from within the
| directory. This will create a #[code .tar.gz] archive in a directory | directory. This will create a #[code .tar.gz] archive in a directory
| #[code /dist]. | #[code /dist]. For more information on building Python packages, see the
| #[+a("https://setuptools.readthedocs.io/en/latest/") Python Setuptools documentation].
+code(false, "bash"). +code(false, "bash").
python setup.py sdist python setup.py sdist
p +h(2, "loading") Loading a custom model package
| For more information on building Python packages, see the
| #[+a("https://setuptools.readthedocs.io/en/latest/") Python Setuptools documentation].
+h(2, "loading") Loading a model package
p p
| Model packages can be installed by pointing pip to the model's | To load a model from a data directory, you can use
| #[code .tar.gz] archive: | #[+api("spacy#load") #[code spacy.load()]] with the local path:
+code.
nlp = spacy.load('/path/to/model')
p
| If you have generated a model package, you can also install it by
| pointing pip to the model's #[code .tar.gz] archive this is pretty
| much exactly what spaCy's #[+api("cli#download") #[code download]]
| command does under the hood.
+code(false, "bash"). +code(false, "bash").
pip install /path/to/en_example_model-1.0.0.tar.gz pip install /path/to/en_example_model-1.0.0.tar.gz
p You'll then be able to load the model as follows: +aside-code("Custom model names", "bash").
# optional: assign custom name to model
python -m spacy link en_example_model my_cool_model
p
| You'll then be able to load the model via spaCy's loader, or by importing
| it as a module. For larger code bases, we usually recommend native
| imports, as this will make it easier to integrate models with your
| existing build process, continuous integration workflow and testing
| framework.
+code. +code.
# option 1: import model as module
import en_example_model import en_example_model
nlp = en_example_model.load() nlp = en_example_model.load()
p # option 2: use spacy.load()
| To load the model via #[code spacy.load()], you can also nlp = spacy.load('en_example_model')
| create a #[+a("/docs/usage/models#usage") shortcut link] that maps the
| package name to a custom model name of your choice:
+code(false, "bash").
python -m spacy link en_example_model example
+code.
import spacy
nlp = spacy.load('example')

View File

@ -77,7 +77,7 @@ p
p p
| To make the model more convenient to deploy, we recommend wrapping it as | To make the model more convenient to deploy, we recommend wrapping it as
| a Python package, so that you can install it via pip and load it as a | a Python package, so that you can install it via pip and load it as a
| module. spaCy comes with a handy #[+a("/docs/api/cli#package") #[code package]] | module. spaCy comes with a handy #[+api("cli#package") #[code package]]
| CLI command to create all required files and directories. | CLI command to create all required files and directories.
+code(false, "bash"). +code(false, "bash").