diff --git a/spacy/__main__.py b/spacy/__main__.py index 3333e90a9..fedf3166b 100644 --- a/spacy/__main__.py +++ b/spacy/__main__.py @@ -91,7 +91,6 @@ class CLI(object): parser_L1=0.0, no_tagger=False, no_parser=False, no_ner=False): """Train a model.""" - cli_train(lang, output_dir, train_data, dev_data, n_iter, not no_tagger, not no_parser, not no_ner, parser_L1) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 36f1d75a3..70e89fada 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -36,12 +36,13 @@ def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ne 'lang': language, 'features': lang.Defaults.tagger_features} gold_train = list(read_gold_json(train_path)) - gold_dev = list(read_gold_json(dev_path)) + gold_dev = list(read_gold_json(dev_path)) if dev_path else None train_model(lang, gold_train, gold_dev, output_path, tagger_cfg, parser_cfg, entity_cfg, n_iter) - scorer = evaluate(lang, list(read_gold_json(dev_path)), output_path) - print_results(scorer) + if gold_dev: + scorer = evaluate(lang, gold_dev, output_path) + print_results(scorer) def train_config(config): @@ -64,7 +65,7 @@ def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_ for itn, epoch in enumerate(trainer.epochs(n_iter, augment_data=None)): for doc, gold in epoch: trainer.update(doc, gold) - dev_scores = trainer.evaluate(dev_data) + dev_scores = trainer.evaluate(dev_data) if dev_data else [] print_progress(itn, trainer.nlp.parser.model.nr_weight, trainer.nlp.parser.model.nr_active_feat, **dev_scores.scores) @@ -92,8 +93,10 @@ def evaluate(Language, gold_tuples, output_path): def check_dirs(output_path, train_path, dev_path): if not output_path.exists(): util.sys_exit(output_path.as_posix(), title="Output directory not found") - if not train_path.exists() and train_path.is_file(): + if not train_path.exists() or not train_path.is_file(): util.sys_exit(train_path.as_posix(), title="Training data not found") + if dev_path and not dev_path.exists(): + util.sys_exit(dev_path.as_posix(), title="Development data not found") def print_progress(itn, nr_weight, nr_active_feat, **scores): diff --git a/website/_harp.json b/website/_harp.json index d26851804..82e82093e 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -28,7 +28,8 @@ "NAVIGATION": { "Home": "/", - "Docs": "/docs", + "Usage": "/docs/usage", + "API": "/docs/api", "Demos": "/docs/usage/showcase", "Blog": "https://explosion.ai/blog" }, @@ -62,7 +63,16 @@ "user": "spacy.us12", "id": "83b0498b1e7fa3c91ce68c3f1", "list": "89ad33e698" + }, + "BADGES": { + "pipy": { + "badge": "https://img.shields.io/pypi/v/spacy.svg?style=flat-square", + "link": "https://pypi.python.org/pypi/spacy" + }, + "conda": { + "badge": "https://anaconda.org/conda-forge/spacy/badges/version.svg", + "link": "https://anaconda.org/conda-forge/spacy" + } } - } } diff --git a/website/_includes/_mixins-base.jade b/website/_includes/_mixins-base.jade index ea4d2964d..80aa98a00 100644 --- a/website/_includes/_mixins-base.jade +++ b/website/_includes/_mixins-base.jade @@ -110,6 +110,17 @@ mixin gitter(button, label) !=button +//- Badge + name - [string] "pipy" or "conda" + +mixin badge(name) + - site = BADGES[name] + + if site + +a(site.link).u-padding-small + img(src=site.badge alt="{name} version" height="20") + + //- Logo mixin logo() diff --git a/website/_includes/_navigation.jade b/website/_includes/_navigation.jade index beb33be4b..d319ef2c9 100644 --- a/website/_includes/_navigation.jade +++ b/website/_includes/_navigation.jade @@ -6,11 +6,11 @@ nav.c-nav.u-text.js-nav(class=landing ? "c-nav--theme" : null) a(href='/') #[+logo] if SUBSECTION != "index" - .u-text-label.u-padding-small=SUBSECTION + .u-text-label.u-padding-small.u-hidden-xs=SUBSECTION ul.c-nav__menu each url, item in NAVIGATION - li.c-nav__menu__item + li.c-nav__menu__item(class=(url == "/") ? "u-hidden-xs" : null) +a(url)=item li.c-nav__menu__item diff --git a/website/assets/css/_base/_layout.sass b/website/assets/css/_base/_layout.sass index 3f680bdc2..8828651c6 100644 --- a/website/assets/css/_base/_layout.sass +++ b/website/assets/css/_base/_layout.sass @@ -34,3 +34,4 @@ main aside a ::selection background: $color-theme color: $color-back + text-shadow: none diff --git a/website/assets/css/_base/_objects.sass b/website/assets/css/_base/_objects.sass index abd5453f4..39a8c1e86 100644 --- a/website/assets/css/_base/_objects.sass +++ b/website/assets/css/_base/_objects.sass @@ -68,7 +68,7 @@ .o-box background: $color-theme-light padding: 2rem - border: 1px solid darken($color-theme-light, 5) + border-left: 4px solid $color-theme //- Icons diff --git a/website/assets/css/_components/_code.sass b/website/assets/css/_components/_code.sass index 9997f5e02..fd7c20e33 100644 --- a/website/assets/css/_components/_code.sass +++ b/website/assets/css/_components/_code.sass @@ -26,15 +26,19 @@ *:not(.c-code-block) > code font: normal 600 0.8em/#{1} $font-code - background: $color-subtle-light + background: rgba($color-front, 0.05) + box-shadow: 1px 1px 0 rgba($color-front, 0.1) + text-shadow: 1px 1px 0 rgba($color-back, 0.5) color: $color-front padding: 0.1em 0.5em margin: 0 border-radius: 1px + box-decoration-break: clone .c-aside__content & background: lighten($color-front, 10) color: $color-back + text-shadow: none //- Syntax Highlighting diff --git a/website/docs/index.jade b/website/docs/index.jade index c19602002..d5a8c6deb 100644 --- a/website/docs/index.jade +++ b/website/docs/index.jade @@ -5,7 +5,7 @@ include ../_includes/_mixins +aside("Help us improve the docs") | Did you spot a mistake or come across explanations that | are unclear? You can find a "Suggest edits" button at the - | bottom at each page that points you to the source. + | bottom of each page that points you to the source. | We always appreciate | #[+a(gh("spaCy") + "/pulls") pull requests].#[br]#[br] | Have you built something cool with spaCy, or did you diff --git a/website/docs/usage/_data.json b/website/docs/usage/_data.json index b0857762b..c219585f9 100644 --- a/website/docs/usage/_data.json +++ b/website/docs/usage/_data.json @@ -5,6 +5,7 @@ "Models": "models", "Lightning tour": "lightning-tour", "Command line": "cli", + "Troubleshooting": "troubleshooting", "Resources": "resources" }, "Workflows": { @@ -45,6 +46,11 @@ "cli": { "title": "Command Line Interface", + "next": "troubleshooting" + }, + + "troubleshooting": { + "title": "Troubleshooting", "next": "resources" }, diff --git a/website/docs/usage/cli.jade b/website/docs/usage/cli.jade index 66be83923..cc07c18ea 100644 --- a/website/docs/usage/cli.jade +++ b/website/docs/usage/cli.jade @@ -92,7 +92,7 @@ p +row +cell #[code model] +cell positional - +cell Shortcut link of model #[strong (optional)]. + +cell Shortcut link of model (optional). +row +cell #[code --markdown], #[code -md] diff --git a/website/docs/usage/index.jade b/website/docs/usage/index.jade index ee2e6f544..044f5fb37 100644 --- a/website/docs/usage/index.jade +++ b/website/docs/usage/index.jade @@ -23,6 +23,7 @@ p >>> nlp = spacy.load('en') +h(2, "pip") pip + +badge("pipy") p Using pip, spaCy releases are currently only available as source packages. @@ -39,6 +40,7 @@ p pip install spacy +h(2, "conda") conda + +badge("conda") p | Thanks to our great community, we've finally re-added conda support. You diff --git a/website/docs/usage/troubleshooting.jade b/website/docs/usage/troubleshooting.jade new file mode 100644 index 000000000..06454b055 --- /dev/null +++ b/website/docs/usage/troubleshooting.jade @@ -0,0 +1,163 @@ +//- 💫 DOCS > USAGE > TROUBLESHOOTING + +include ../../_includes/_mixins + +p + | This section collects some of the most common errors you may come + | across when installing, loading and using spaCy, as well as their solutions. + ++aside("Help us improve this guide") + | Did you come across a problem like the ones listed here and want to + | share the solution? You can find the "Suggest edits" button at the + | bottom of this page that points you to the source. We always + | appreciate #[+a(gh("spaCy") + "/pulls") pull requests]! + ++h(2, "install-loading") Installation and loading + ++h(3, "compatible-model") No compatible model found + ++code(false, "text"). + No compatible model found for [lang] (spaCy v#{SPACY_VERSION}). + +p + | This usually means that the model you're trying to download does not + | exist, or isn't available for your version of spaCy. + ++infobox("Solutions") + | Check the #[+a(gh("spacy-models", "compatibility.json")) compatibility table] + | to see which models are available for your spaCy version. If you're using + | an old version, consider upgrading to the latest release. Note that while + | spaCy supports tokenization for + | #[+a("/docs/api/language-models/#alpha-support") a variety of languages], + | not all of them come with statistical models. To only use the tokenizer, + | import the language's #[code Language] class instead, for example + | #[code from spacy.fr import French]. + + ++h(3, "symlink-privilege") Symbolic link privilege not held + ++code(false, "text"). + OSError: symbolic link privilege not held + +p + | To create #[+a("/docs/usage/models/#usage") shortcut links] that let you + | load models by name, spaCy creates a symbolic link in the + | #[code spacy/data] directory. This means your user needs permission to do + | this. The above error mostly occurs when doing a system-wide installation, + | which will create the symlinks in a system directory. + ++infobox("Solutions") + | Run the #[code download] or #[code link] command as administrator, + | or use a #[code virtualenv] to install spaCy in a user directory, instead + | of doing a system-wide installation. + + ++h(3, "import-error") Import error + ++code(false, "text"). + Import Error: No module named spacy + +p + | This error means that the spaCy module can't be located on your system, or in + | your environment. + ++infobox("Solutions") + | Make sure you have spaCy installed. If you're using a #[code virtualenv], + | make sure it's activated and check that spaCy is installed in that + | environment – otherwise, you're trying to load a system installation. You + | can also run #[code which python] to find out where your Python + | executable is located. + ++h(3, "import-error-models") Import error: models + ++code(false, "text"). + ImportError: No module named 'en_core_web_sm' + +p + | As of spaCy v1.7, all models can be installed as Python packages. This means + | that they'll become importable modules of your application. When creating + | #[+a("/docs/usage/models/#usage") shortcut links], spaCy will also try + | to import the model to load its meta data. If this fails, it's usually a + | sign that the package is not installed in the current environment. + ++infobox("Solutions") + | Run #[code pip list] or #[code pip freeze] to check which model packages + | you have installed, and install the + | #[+a("/docs/usage/models#available") correct models] if necessary. If you're + | importing a model manually at the top of a file, make sure to use the name + | of the package, not the shortcut link you've created. + ++h(3, "vocab-strings") File not found: vocab/strings.json + ++code(false, "text"). + FileNotFoundError: No such file or directory: [...]/vocab/strings.json + +p + | This error may occur when using #[code spacy.load()] to load + | a language model – either because you haven't set up a + | #[+a("/docs/usage/models/#usage") shortcut link] for it, or because it + | doesn't actually exist. + ++infobox("Solutions") + | Set up a #[+a("/docs/usage/models/#usage") shortcut link] for the model + | you want to load. This can either be an installed model package, or a + | local directory containing the model data. If you want to use one of the + | #[+a("/docs/api/language-models/#alpha-support") alpha tokenizers] for + | languages that don't yet have a statistical model, you should import its + | #[code Language] class instead, for example + | #[code from spacy.fr import French]. + ++h(3, "command-not-found") Command not found + ++code(false, "text"). + command not found: spacy + +p + | This error may occur when running the #[code spacy] command from the + | command line. spaCy does not currently add an entry to our #[code PATH] + | environment variable, as this can lead to unexpected results, especially + | when using #[code virtualenv]. Instead, commands need to be prefixed with + | #[code python -m]. + ++infobox("Solution") + | Run the command with #[code python -m], for example + | #[code python -m spacy download en]. For more info on this, see the + | #[+a("/docs/usage/cli") CLI documentation]. + ++h(2, "usage") Using spaCy + ++h(3, "pos-lemma-number") POS tag or lemma is returned as number + ++code. + doc = nlp(u'This is text.') + print([word.pos for word in doc]) + # [88, 98, 90, 95] + +p + | Like many NLP libraries, spaCy encodes all strings to integers. This + | reduces memory usage and improves efficiency. The integer mapping also + | makes it easy to interoperate with numpy. To access the string + | representation instead of the integer ID, add an underscore #[code _] + | after the attribute. + ++infobox("Solutions") + | Use #[code pos_] or #[code lemma_] instead. See the + | #[+api("token#attributes") #[code Token] attributes] for a list of available + | attributes and their string representations. + + ++h(3, "pron-lemma") Pronoun lemma is returned as #[code -PRON-] + ++code. + doc = nlp(u'They are') + print(doc[0].lemma_) + # -PRON- + +p + | This is in fact expected behaviour and not a bug. + | Unlike verbs and common nouns, there's no clear base form of a personal + | pronoun. Should the lemma of "me" be "I", or should we normalize person + | as well, giving "it" — or maybe "he"? spaCy's solution is to introduce a + | novel symbol, #[code -PRON-], which is used as the lemma for + | all personal pronouns. For more info on this, see the + | #[+api("annotation#lemmatization") annotation specs] on lemmatization.