mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-06 12:53:19 +03:00
Merge CLI changes
This commit is contained in:
commit
9dcb58aaaf
|
@ -91,7 +91,6 @@ class CLI(object):
|
||||||
parser_L1=0.0,
|
parser_L1=0.0,
|
||||||
no_tagger=False, no_parser=False, no_ner=False):
|
no_tagger=False, no_parser=False, no_ner=False):
|
||||||
"""Train a model."""
|
"""Train a model."""
|
||||||
|
|
||||||
cli_train(lang, output_dir, train_data, dev_data, n_iter,
|
cli_train(lang, output_dir, train_data, dev_data, n_iter,
|
||||||
not no_tagger, not no_parser, not no_ner,
|
not no_tagger, not no_parser, not no_ner,
|
||||||
parser_L1)
|
parser_L1)
|
||||||
|
|
|
@ -36,12 +36,13 @@ def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ne
|
||||||
'lang': language,
|
'lang': language,
|
||||||
'features': lang.Defaults.tagger_features}
|
'features': lang.Defaults.tagger_features}
|
||||||
gold_train = list(read_gold_json(train_path))
|
gold_train = list(read_gold_json(train_path))
|
||||||
gold_dev = list(read_gold_json(dev_path))
|
gold_dev = list(read_gold_json(dev_path)) if dev_path else None
|
||||||
|
|
||||||
train_model(lang, gold_train, gold_dev, output_path, tagger_cfg, parser_cfg,
|
train_model(lang, gold_train, gold_dev, output_path, tagger_cfg, parser_cfg,
|
||||||
entity_cfg, n_iter)
|
entity_cfg, n_iter)
|
||||||
scorer = evaluate(lang, list(read_gold_json(dev_path)), output_path)
|
if gold_dev:
|
||||||
print_results(scorer)
|
scorer = evaluate(lang, gold_dev, output_path)
|
||||||
|
print_results(scorer)
|
||||||
|
|
||||||
|
|
||||||
def train_config(config):
|
def train_config(config):
|
||||||
|
@ -64,7 +65,7 @@ def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_
|
||||||
for itn, epoch in enumerate(trainer.epochs(n_iter, augment_data=None)):
|
for itn, epoch in enumerate(trainer.epochs(n_iter, augment_data=None)):
|
||||||
for doc, gold in epoch:
|
for doc, gold in epoch:
|
||||||
trainer.update(doc, gold)
|
trainer.update(doc, gold)
|
||||||
dev_scores = trainer.evaluate(dev_data)
|
dev_scores = trainer.evaluate(dev_data) if dev_data else []
|
||||||
print_progress(itn, trainer.nlp.parser.model.nr_weight,
|
print_progress(itn, trainer.nlp.parser.model.nr_weight,
|
||||||
trainer.nlp.parser.model.nr_active_feat,
|
trainer.nlp.parser.model.nr_active_feat,
|
||||||
**dev_scores.scores)
|
**dev_scores.scores)
|
||||||
|
@ -92,8 +93,10 @@ def evaluate(Language, gold_tuples, output_path):
|
||||||
def check_dirs(output_path, train_path, dev_path):
|
def check_dirs(output_path, train_path, dev_path):
|
||||||
if not output_path.exists():
|
if not output_path.exists():
|
||||||
util.sys_exit(output_path.as_posix(), title="Output directory not found")
|
util.sys_exit(output_path.as_posix(), title="Output directory not found")
|
||||||
if not train_path.exists() and train_path.is_file():
|
if not train_path.exists() or not train_path.is_file():
|
||||||
util.sys_exit(train_path.as_posix(), title="Training data not found")
|
util.sys_exit(train_path.as_posix(), title="Training data not found")
|
||||||
|
if dev_path and not dev_path.exists():
|
||||||
|
util.sys_exit(dev_path.as_posix(), title="Development data not found")
|
||||||
|
|
||||||
|
|
||||||
def print_progress(itn, nr_weight, nr_active_feat, **scores):
|
def print_progress(itn, nr_weight, nr_active_feat, **scores):
|
||||||
|
|
|
@ -28,7 +28,8 @@
|
||||||
|
|
||||||
"NAVIGATION": {
|
"NAVIGATION": {
|
||||||
"Home": "/",
|
"Home": "/",
|
||||||
"Docs": "/docs",
|
"Usage": "/docs/usage",
|
||||||
|
"API": "/docs/api",
|
||||||
"Demos": "/docs/usage/showcase",
|
"Demos": "/docs/usage/showcase",
|
||||||
"Blog": "https://explosion.ai/blog"
|
"Blog": "https://explosion.ai/blog"
|
||||||
},
|
},
|
||||||
|
@ -62,7 +63,16 @@
|
||||||
"user": "spacy.us12",
|
"user": "spacy.us12",
|
||||||
"id": "83b0498b1e7fa3c91ce68c3f1",
|
"id": "83b0498b1e7fa3c91ce68c3f1",
|
||||||
"list": "89ad33e698"
|
"list": "89ad33e698"
|
||||||
|
},
|
||||||
|
"BADGES": {
|
||||||
|
"pipy": {
|
||||||
|
"badge": "https://img.shields.io/pypi/v/spacy.svg?style=flat-square",
|
||||||
|
"link": "https://pypi.python.org/pypi/spacy"
|
||||||
|
},
|
||||||
|
"conda": {
|
||||||
|
"badge": "https://anaconda.org/conda-forge/spacy/badges/version.svg",
|
||||||
|
"link": "https://anaconda.org/conda-forge/spacy"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -110,6 +110,17 @@ mixin gitter(button, label)
|
||||||
!=button
|
!=button
|
||||||
|
|
||||||
|
|
||||||
|
//- Badge
|
||||||
|
name - [string] "pipy" or "conda"
|
||||||
|
|
||||||
|
mixin badge(name)
|
||||||
|
- site = BADGES[name]
|
||||||
|
|
||||||
|
if site
|
||||||
|
+a(site.link).u-padding-small
|
||||||
|
img(src=site.badge alt="{name} version" height="20")
|
||||||
|
|
||||||
|
|
||||||
//- Logo
|
//- Logo
|
||||||
|
|
||||||
mixin logo()
|
mixin logo()
|
||||||
|
|
|
@ -6,11 +6,11 @@ nav.c-nav.u-text.js-nav(class=landing ? "c-nav--theme" : null)
|
||||||
a(href='/') #[+logo]
|
a(href='/') #[+logo]
|
||||||
|
|
||||||
if SUBSECTION != "index"
|
if SUBSECTION != "index"
|
||||||
.u-text-label.u-padding-small=SUBSECTION
|
.u-text-label.u-padding-small.u-hidden-xs=SUBSECTION
|
||||||
|
|
||||||
ul.c-nav__menu
|
ul.c-nav__menu
|
||||||
each url, item in NAVIGATION
|
each url, item in NAVIGATION
|
||||||
li.c-nav__menu__item
|
li.c-nav__menu__item(class=(url == "/") ? "u-hidden-xs" : null)
|
||||||
+a(url)=item
|
+a(url)=item
|
||||||
|
|
||||||
li.c-nav__menu__item
|
li.c-nav__menu__item
|
||||||
|
|
|
@ -34,3 +34,4 @@ main aside a
|
||||||
::selection
|
::selection
|
||||||
background: $color-theme
|
background: $color-theme
|
||||||
color: $color-back
|
color: $color-back
|
||||||
|
text-shadow: none
|
||||||
|
|
|
@ -68,7 +68,7 @@
|
||||||
.o-box
|
.o-box
|
||||||
background: $color-theme-light
|
background: $color-theme-light
|
||||||
padding: 2rem
|
padding: 2rem
|
||||||
border: 1px solid darken($color-theme-light, 5)
|
border-left: 4px solid $color-theme
|
||||||
|
|
||||||
|
|
||||||
//- Icons
|
//- Icons
|
||||||
|
|
|
@ -26,15 +26,19 @@
|
||||||
|
|
||||||
*:not(.c-code-block) > code
|
*:not(.c-code-block) > code
|
||||||
font: normal 600 0.8em/#{1} $font-code
|
font: normal 600 0.8em/#{1} $font-code
|
||||||
background: $color-subtle-light
|
background: rgba($color-front, 0.05)
|
||||||
|
box-shadow: 1px 1px 0 rgba($color-front, 0.1)
|
||||||
|
text-shadow: 1px 1px 0 rgba($color-back, 0.5)
|
||||||
color: $color-front
|
color: $color-front
|
||||||
padding: 0.1em 0.5em
|
padding: 0.1em 0.5em
|
||||||
margin: 0
|
margin: 0
|
||||||
border-radius: 1px
|
border-radius: 1px
|
||||||
|
box-decoration-break: clone
|
||||||
|
|
||||||
.c-aside__content &
|
.c-aside__content &
|
||||||
background: lighten($color-front, 10)
|
background: lighten($color-front, 10)
|
||||||
color: $color-back
|
color: $color-back
|
||||||
|
text-shadow: none
|
||||||
|
|
||||||
|
|
||||||
//- Syntax Highlighting
|
//- Syntax Highlighting
|
||||||
|
|
|
@ -5,7 +5,7 @@ include ../_includes/_mixins
|
||||||
+aside("Help us improve the docs")
|
+aside("Help us improve the docs")
|
||||||
| Did you spot a mistake or come across explanations that
|
| Did you spot a mistake or come across explanations that
|
||||||
| are unclear? You can find a "Suggest edits" button at the
|
| are unclear? You can find a "Suggest edits" button at the
|
||||||
| bottom at each page that points you to the source.
|
| bottom of each page that points you to the source.
|
||||||
| We always appreciate
|
| We always appreciate
|
||||||
| #[+a(gh("spaCy") + "/pulls") pull requests].#[br]#[br]
|
| #[+a(gh("spaCy") + "/pulls") pull requests].#[br]#[br]
|
||||||
| Have you built something cool with spaCy, or did you
|
| Have you built something cool with spaCy, or did you
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
"Models": "models",
|
"Models": "models",
|
||||||
"Lightning tour": "lightning-tour",
|
"Lightning tour": "lightning-tour",
|
||||||
"Command line": "cli",
|
"Command line": "cli",
|
||||||
|
"Troubleshooting": "troubleshooting",
|
||||||
"Resources": "resources"
|
"Resources": "resources"
|
||||||
},
|
},
|
||||||
"Workflows": {
|
"Workflows": {
|
||||||
|
@ -45,6 +46,11 @@
|
||||||
|
|
||||||
"cli": {
|
"cli": {
|
||||||
"title": "Command Line Interface",
|
"title": "Command Line Interface",
|
||||||
|
"next": "troubleshooting"
|
||||||
|
},
|
||||||
|
|
||||||
|
"troubleshooting": {
|
||||||
|
"title": "Troubleshooting",
|
||||||
"next": "resources"
|
"next": "resources"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
|
@ -92,7 +92,7 @@ p
|
||||||
+row
|
+row
|
||||||
+cell #[code model]
|
+cell #[code model]
|
||||||
+cell positional
|
+cell positional
|
||||||
+cell Shortcut link of model #[strong (optional)].
|
+cell Shortcut link of model (optional).
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code --markdown], #[code -md]
|
+cell #[code --markdown], #[code -md]
|
||||||
|
|
|
@ -23,6 +23,7 @@ p
|
||||||
>>> nlp = spacy.load('en')
|
>>> nlp = spacy.load('en')
|
||||||
|
|
||||||
+h(2, "pip") pip
|
+h(2, "pip") pip
|
||||||
|
+badge("pipy")
|
||||||
|
|
||||||
p Using pip, spaCy releases are currently only available as source packages.
|
p Using pip, spaCy releases are currently only available as source packages.
|
||||||
|
|
||||||
|
@ -39,6 +40,7 @@ p
|
||||||
pip install spacy
|
pip install spacy
|
||||||
|
|
||||||
+h(2, "conda") conda
|
+h(2, "conda") conda
|
||||||
|
+badge("conda")
|
||||||
|
|
||||||
p
|
p
|
||||||
| Thanks to our great community, we've finally re-added conda support. You
|
| Thanks to our great community, we've finally re-added conda support. You
|
||||||
|
|
163
website/docs/usage/troubleshooting.jade
Normal file
163
website/docs/usage/troubleshooting.jade
Normal file
|
@ -0,0 +1,163 @@
|
||||||
|
//- 💫 DOCS > USAGE > TROUBLESHOOTING
|
||||||
|
|
||||||
|
include ../../_includes/_mixins
|
||||||
|
|
||||||
|
p
|
||||||
|
| This section collects some of the most common errors you may come
|
||||||
|
| across when installing, loading and using spaCy, as well as their solutions.
|
||||||
|
|
||||||
|
+aside("Help us improve this guide")
|
||||||
|
| Did you come across a problem like the ones listed here and want to
|
||||||
|
| share the solution? You can find the "Suggest edits" button at the
|
||||||
|
| bottom of this page that points you to the source. We always
|
||||||
|
| appreciate #[+a(gh("spaCy") + "/pulls") pull requests]!
|
||||||
|
|
||||||
|
+h(2, "install-loading") Installation and loading
|
||||||
|
|
||||||
|
+h(3, "compatible-model") No compatible model found
|
||||||
|
|
||||||
|
+code(false, "text").
|
||||||
|
No compatible model found for [lang] (spaCy v#{SPACY_VERSION}).
|
||||||
|
|
||||||
|
p
|
||||||
|
| This usually means that the model you're trying to download does not
|
||||||
|
| exist, or isn't available for your version of spaCy.
|
||||||
|
|
||||||
|
+infobox("Solutions")
|
||||||
|
| Check the #[+a(gh("spacy-models", "compatibility.json")) compatibility table]
|
||||||
|
| to see which models are available for your spaCy version. If you're using
|
||||||
|
| an old version, consider upgrading to the latest release. Note that while
|
||||||
|
| spaCy supports tokenization for
|
||||||
|
| #[+a("/docs/api/language-models/#alpha-support") a variety of languages],
|
||||||
|
| not all of them come with statistical models. To only use the tokenizer,
|
||||||
|
| import the language's #[code Language] class instead, for example
|
||||||
|
| #[code from spacy.fr import French].
|
||||||
|
|
||||||
|
|
||||||
|
+h(3, "symlink-privilege") Symbolic link privilege not held
|
||||||
|
|
||||||
|
+code(false, "text").
|
||||||
|
OSError: symbolic link privilege not held
|
||||||
|
|
||||||
|
p
|
||||||
|
| To create #[+a("/docs/usage/models/#usage") shortcut links] that let you
|
||||||
|
| load models by name, spaCy creates a symbolic link in the
|
||||||
|
| #[code spacy/data] directory. This means your user needs permission to do
|
||||||
|
| this. The above error mostly occurs when doing a system-wide installation,
|
||||||
|
| which will create the symlinks in a system directory.
|
||||||
|
|
||||||
|
+infobox("Solutions")
|
||||||
|
| Run the #[code download] or #[code link] command as administrator,
|
||||||
|
| or use a #[code virtualenv] to install spaCy in a user directory, instead
|
||||||
|
| of doing a system-wide installation.
|
||||||
|
|
||||||
|
|
||||||
|
+h(3, "import-error") Import error
|
||||||
|
|
||||||
|
+code(false, "text").
|
||||||
|
Import Error: No module named spacy
|
||||||
|
|
||||||
|
p
|
||||||
|
| This error means that the spaCy module can't be located on your system, or in
|
||||||
|
| your environment.
|
||||||
|
|
||||||
|
+infobox("Solutions")
|
||||||
|
| Make sure you have spaCy installed. If you're using a #[code virtualenv],
|
||||||
|
| make sure it's activated and check that spaCy is installed in that
|
||||||
|
| environment – otherwise, you're trying to load a system installation. You
|
||||||
|
| can also run #[code which python] to find out where your Python
|
||||||
|
| executable is located.
|
||||||
|
|
||||||
|
+h(3, "import-error-models") Import error: models
|
||||||
|
|
||||||
|
+code(false, "text").
|
||||||
|
ImportError: No module named 'en_core_web_sm'
|
||||||
|
|
||||||
|
p
|
||||||
|
| As of spaCy v1.7, all models can be installed as Python packages. This means
|
||||||
|
| that they'll become importable modules of your application. When creating
|
||||||
|
| #[+a("/docs/usage/models/#usage") shortcut links], spaCy will also try
|
||||||
|
| to import the model to load its meta data. If this fails, it's usually a
|
||||||
|
| sign that the package is not installed in the current environment.
|
||||||
|
|
||||||
|
+infobox("Solutions")
|
||||||
|
| Run #[code pip list] or #[code pip freeze] to check which model packages
|
||||||
|
| you have installed, and install the
|
||||||
|
| #[+a("/docs/usage/models#available") correct models] if necessary. If you're
|
||||||
|
| importing a model manually at the top of a file, make sure to use the name
|
||||||
|
| of the package, not the shortcut link you've created.
|
||||||
|
|
||||||
|
+h(3, "vocab-strings") File not found: vocab/strings.json
|
||||||
|
|
||||||
|
+code(false, "text").
|
||||||
|
FileNotFoundError: No such file or directory: [...]/vocab/strings.json
|
||||||
|
|
||||||
|
p
|
||||||
|
| This error may occur when using #[code spacy.load()] to load
|
||||||
|
| a language model – either because you haven't set up a
|
||||||
|
| #[+a("/docs/usage/models/#usage") shortcut link] for it, or because it
|
||||||
|
| doesn't actually exist.
|
||||||
|
|
||||||
|
+infobox("Solutions")
|
||||||
|
| Set up a #[+a("/docs/usage/models/#usage") shortcut link] for the model
|
||||||
|
| you want to load. This can either be an installed model package, or a
|
||||||
|
| local directory containing the model data. If you want to use one of the
|
||||||
|
| #[+a("/docs/api/language-models/#alpha-support") alpha tokenizers] for
|
||||||
|
| languages that don't yet have a statistical model, you should import its
|
||||||
|
| #[code Language] class instead, for example
|
||||||
|
| #[code from spacy.fr import French].
|
||||||
|
|
||||||
|
+h(3, "command-not-found") Command not found
|
||||||
|
|
||||||
|
+code(false, "text").
|
||||||
|
command not found: spacy
|
||||||
|
|
||||||
|
p
|
||||||
|
| This error may occur when running the #[code spacy] command from the
|
||||||
|
| command line. spaCy does not currently add an entry to our #[code PATH]
|
||||||
|
| environment variable, as this can lead to unexpected results, especially
|
||||||
|
| when using #[code virtualenv]. Instead, commands need to be prefixed with
|
||||||
|
| #[code python -m].
|
||||||
|
|
||||||
|
+infobox("Solution")
|
||||||
|
| Run the command with #[code python -m], for example
|
||||||
|
| #[code python -m spacy download en]. For more info on this, see the
|
||||||
|
| #[+a("/docs/usage/cli") CLI documentation].
|
||||||
|
|
||||||
|
+h(2, "usage") Using spaCy
|
||||||
|
|
||||||
|
+h(3, "pos-lemma-number") POS tag or lemma is returned as number
|
||||||
|
|
||||||
|
+code.
|
||||||
|
doc = nlp(u'This is text.')
|
||||||
|
print([word.pos for word in doc])
|
||||||
|
# [88, 98, 90, 95]
|
||||||
|
|
||||||
|
p
|
||||||
|
| Like many NLP libraries, spaCy encodes all strings to integers. This
|
||||||
|
| reduces memory usage and improves efficiency. The integer mapping also
|
||||||
|
| makes it easy to interoperate with numpy. To access the string
|
||||||
|
| representation instead of the integer ID, add an underscore #[code _]
|
||||||
|
| after the attribute.
|
||||||
|
|
||||||
|
+infobox("Solutions")
|
||||||
|
| Use #[code pos_] or #[code lemma_] instead. See the
|
||||||
|
| #[+api("token#attributes") #[code Token] attributes] for a list of available
|
||||||
|
| attributes and their string representations.
|
||||||
|
|
||||||
|
|
||||||
|
+h(3, "pron-lemma") Pronoun lemma is returned as #[code -PRON-]
|
||||||
|
|
||||||
|
+code.
|
||||||
|
doc = nlp(u'They are')
|
||||||
|
print(doc[0].lemma_)
|
||||||
|
# -PRON-
|
||||||
|
|
||||||
|
p
|
||||||
|
| This is in fact expected behaviour and not a bug.
|
||||||
|
| Unlike verbs and common nouns, there's no clear base form of a personal
|
||||||
|
| pronoun. Should the lemma of "me" be "I", or should we normalize person
|
||||||
|
| as well, giving "it" — or maybe "he"? spaCy's solution is to introduce a
|
||||||
|
| novel symbol, #[code -PRON-], which is used as the lemma for
|
||||||
|
| all personal pronouns. For more info on this, see the
|
||||||
|
| #[+api("annotation#lemmatization") annotation specs] on lemmatization.
|
Loading…
Reference in New Issue
Block a user