From 323b418cf1f01241f0112c11fc7e3a7f0c87eb4a Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 11:13:52 +0200 Subject: [PATCH 01/12] Split docs menu item into Usage and API --- website/_harp.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/website/_harp.json b/website/_harp.json index d26851804..2cbfe538b 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -28,7 +28,8 @@ "NAVIGATION": { "Home": "/", - "Docs": "/docs", + "Usage": "/docs/usage", + "API": "/docs/api", "Demos": "/docs/usage/showcase", "Blog": "https://explosion.ai/blog" }, From 8389e0549672511531faffcf69f916f683bd5b99 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 11:14:08 +0200 Subject: [PATCH 02/12] Hide home link and subsection title on small screens --- website/_includes/_navigation.jade | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/_includes/_navigation.jade b/website/_includes/_navigation.jade index beb33be4b..d319ef2c9 100644 --- a/website/_includes/_navigation.jade +++ b/website/_includes/_navigation.jade @@ -6,11 +6,11 @@ nav.c-nav.u-text.js-nav(class=landing ? "c-nav--theme" : null) a(href='/') #[+logo] if SUBSECTION != "index" - .u-text-label.u-padding-small=SUBSECTION + .u-text-label.u-padding-small.u-hidden-xs=SUBSECTION ul.c-nav__menu each url, item in NAVIGATION - li.c-nav__menu__item + li.c-nav__menu__item(class=(url == "/") ? "u-hidden-xs" : null) +a(url)=item li.c-nav__menu__item From 09d7f26bed6af9fa9d76242995ad0e1ac00785af Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 11:14:16 +0200 Subject: [PATCH 03/12] Remove text shadow on selected text --- website/assets/css/_base/_layout.sass | 1 + 1 file changed, 1 insertion(+) diff --git a/website/assets/css/_base/_layout.sass b/website/assets/css/_base/_layout.sass index 3f680bdc2..8828651c6 100644 --- a/website/assets/css/_base/_layout.sass +++ b/website/assets/css/_base/_layout.sass @@ -34,3 +34,4 @@ main aside a ::selection background: $color-theme color: $color-back + text-shadow: none From 88160f5daa7736a7cca0cb0c2e7edcd88bed847b Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 11:14:30 +0200 Subject: [PATCH 04/12] Update border style on infoboxes --- website/assets/css/_base/_objects.sass | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/assets/css/_base/_objects.sass b/website/assets/css/_base/_objects.sass index abd5453f4..39a8c1e86 100644 --- a/website/assets/css/_base/_objects.sass +++ b/website/assets/css/_base/_objects.sass @@ -68,7 +68,7 @@ .o-box background: $color-theme-light padding: 2rem - border: 1px solid darken($color-theme-light, 5) + border-left: 4px solid $color-theme //- Icons From a9368b591aa131e0563bf71d3f821a78fc6c8c61 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 11:14:36 +0200 Subject: [PATCH 05/12] Update inline code style --- website/assets/css/_components/_code.sass | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/website/assets/css/_components/_code.sass b/website/assets/css/_components/_code.sass index 9997f5e02..fd7c20e33 100644 --- a/website/assets/css/_components/_code.sass +++ b/website/assets/css/_components/_code.sass @@ -26,15 +26,19 @@ *:not(.c-code-block) > code font: normal 600 0.8em/#{1} $font-code - background: $color-subtle-light + background: rgba($color-front, 0.05) + box-shadow: 1px 1px 0 rgba($color-front, 0.1) + text-shadow: 1px 1px 0 rgba($color-back, 0.5) color: $color-front padding: 0.1em 0.5em margin: 0 border-radius: 1px + box-decoration-break: clone .c-aside__content & background: lighten($color-front, 10) color: $color-back + text-shadow: none //- Syntax Highlighting From 1dae97b2f6e39be2810ddeaafdca0a1c138c87dc Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 11:14:44 +0200 Subject: [PATCH 06/12] Fix typos --- website/docs/index.jade | 2 +- website/docs/usage/training.jade | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/index.jade b/website/docs/index.jade index c19602002..d5a8c6deb 100644 --- a/website/docs/index.jade +++ b/website/docs/index.jade @@ -5,7 +5,7 @@ include ../_includes/_mixins +aside("Help us improve the docs") | Did you spot a mistake or come across explanations that | are unclear? You can find a "Suggest edits" button at the - | bottom at each page that points you to the source. + | bottom of each page that points you to the source. | We always appreciate | #[+a(gh("spaCy") + "/pulls") pull requests].#[br]#[br] | Have you built something cool with spaCy, or did you diff --git a/website/docs/usage/training.jade b/website/docs/usage/training.jade index da452ac83..39f524829 100644 --- a/website/docs/usage/training.jade +++ b/website/docs/usage/training.jade @@ -82,7 +82,7 @@ p | conjunction features out of the atomic predictors. Let's say you have | two atomic predictors asking, "What is the part-of-speech of the | previous token?", and "What is the part-of-speech of the previous - | previous token?". These ppredictors will introduce a number of features, + | previous token?". These predictors will introduce a number of features, | e.g. #[code Prev-pos=NN], #[code Prev-pos=VBZ], etc. A conjunction | template introduces features such as #[code Prev-pos=NN&Prev-pos=VBZ]. From 0fc56e2544d03a2df52bdc03533f434dfa2cc381 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 11:42:11 +0200 Subject: [PATCH 07/12] Update flag and defaults --- spacy/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/__main__.py b/spacy/__main__.py index 36dbb93d6..c4dc31d73 100644 --- a/spacy/__main__.py +++ b/spacy/__main__.py @@ -84,9 +84,9 @@ class CLI(object): n_iter=("number of iterations", "flag", "n", int), tagger=("train tagger", "flag", "t", bool), parser=("train parser", "flag", "p", bool), - ner=("train NER", "flag", "n", bool) + ner=("train NER", "flag", "e", bool) ) - def train(self, lang, output_dir, train_data, dev_data, n_iter=15, tagger=True, + def train(self, lang, output_dir, train_data, dev_data=None, n_iter=15, tagger=True, parser=True, ner=True): """Train a model.""" From 53cf2f1c0ed1adb5b14c23ebc05142e4b50dac70 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 11:48:17 +0200 Subject: [PATCH 08/12] Make dev data optional --- spacy/cli/train.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 58c30baf2..896868419 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -16,7 +16,7 @@ from .. import util def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ner): output_path = Path(output_dir) train_path = Path(train_data) - dev_path = Path(dev_data) + dev_path = Path(dev_data) if dev_data else None check_dirs(output_path, data_path, dev_path) lang = util.get_lang_class(language) @@ -26,12 +26,13 @@ def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ne parser_cfg['features'] = lang.Defaults.parser_features entity_cfg['features'] = lang.Defaults.entity_features gold_train = list(read_gold_json(train_path)) - gold_dev = list(read_gold_json(dev_path)) + gold_dev = list(read_gold_json(dev_path)) if dev_path else None train_model(lang, gold_train, gold_dev, output_path, tagger_cfg, parser_cfg, entity_cfg, n_iter) - scorer = evaluate(lang, list(read_gold_json(dev_loc)), output_path) - print_results(scorer) + if gold_dev: + scorer = evaluate(lang, gold_dev, output_path) + print_results(scorer) def train_config(config): @@ -54,7 +55,7 @@ def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_ for itn, epoch in enumerate(trainer.epochs(n_iter, augment_data=None)): for doc, gold in epoch: trainer.update(doc, gold) - dev_scores = trainer.evaluate(dev_data) + dev_scores = trainer.evaluate(dev_data) if dev_data else [] print_progress(itn, trainer.nlp.parser.model.nr_weight, trainer.nlp.parser.model.nr_active_feat, **dev_scores.scores) @@ -82,8 +83,10 @@ def evaluate(Language, gold_tuples, output_path): def check_dirs(input_path, train_path, dev_path): if not output_path.exists(): util.sys_exit(output_path.as_posix(), title="Output directory not found") - if not train_path.exists() and train_path.is_file(): + if not train_path.exists() or not train_path.is_file(): util.sys_exit(train_path.as_posix(), title="Training data not found") + if dev_path and not dev_path.exists(): + util.sys_exit(dev_path.as_posix(), title="Development data not found") def print_progress(itn, nr_weight, nr_active_feat, **scores): From d4a86b63943b0c82bafaaaeabbcfe6fa6a89c06f Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 13:42:19 +0200 Subject: [PATCH 09/12] Update formatting --- website/docs/usage/cli.jade | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/usage/cli.jade b/website/docs/usage/cli.jade index 66be83923..cc07c18ea 100644 --- a/website/docs/usage/cli.jade +++ b/website/docs/usage/cli.jade @@ -92,7 +92,7 @@ p +row +cell #[code model] +cell positional - +cell Shortcut link of model #[strong (optional)]. + +cell Shortcut link of model (optional). +row +cell #[code --markdown], #[code -md] From 9a481c9f4282e4ff6f20a7298c5e946274eae325 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 13:42:36 +0200 Subject: [PATCH 10/12] Add "Troubleshooting" section --- website/docs/usage/_data.json | 6 + website/docs/usage/troubleshooting.jade | 163 ++++++++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 website/docs/usage/troubleshooting.jade diff --git a/website/docs/usage/_data.json b/website/docs/usage/_data.json index b0857762b..c219585f9 100644 --- a/website/docs/usage/_data.json +++ b/website/docs/usage/_data.json @@ -5,6 +5,7 @@ "Models": "models", "Lightning tour": "lightning-tour", "Command line": "cli", + "Troubleshooting": "troubleshooting", "Resources": "resources" }, "Workflows": { @@ -45,6 +46,11 @@ "cli": { "title": "Command Line Interface", + "next": "troubleshooting" + }, + + "troubleshooting": { + "title": "Troubleshooting", "next": "resources" }, diff --git a/website/docs/usage/troubleshooting.jade b/website/docs/usage/troubleshooting.jade new file mode 100644 index 000000000..06454b055 --- /dev/null +++ b/website/docs/usage/troubleshooting.jade @@ -0,0 +1,163 @@ +//- 💫 DOCS > USAGE > TROUBLESHOOTING + +include ../../_includes/_mixins + +p + | This section collects some of the most common errors you may come + | across when installing, loading and using spaCy, as well as their solutions. + ++aside("Help us improve this guide") + | Did you come across a problem like the ones listed here and want to + | share the solution? You can find the "Suggest edits" button at the + | bottom of this page that points you to the source. We always + | appreciate #[+a(gh("spaCy") + "/pulls") pull requests]! + ++h(2, "install-loading") Installation and loading + ++h(3, "compatible-model") No compatible model found + ++code(false, "text"). + No compatible model found for [lang] (spaCy v#{SPACY_VERSION}). + +p + | This usually means that the model you're trying to download does not + | exist, or isn't available for your version of spaCy. + ++infobox("Solutions") + | Check the #[+a(gh("spacy-models", "compatibility.json")) compatibility table] + | to see which models are available for your spaCy version. If you're using + | an old version, consider upgrading to the latest release. Note that while + | spaCy supports tokenization for + | #[+a("/docs/api/language-models/#alpha-support") a variety of languages], + | not all of them come with statistical models. To only use the tokenizer, + | import the language's #[code Language] class instead, for example + | #[code from spacy.fr import French]. + + ++h(3, "symlink-privilege") Symbolic link privilege not held + ++code(false, "text"). + OSError: symbolic link privilege not held + +p + | To create #[+a("/docs/usage/models/#usage") shortcut links] that let you + | load models by name, spaCy creates a symbolic link in the + | #[code spacy/data] directory. This means your user needs permission to do + | this. The above error mostly occurs when doing a system-wide installation, + | which will create the symlinks in a system directory. + ++infobox("Solutions") + | Run the #[code download] or #[code link] command as administrator, + | or use a #[code virtualenv] to install spaCy in a user directory, instead + | of doing a system-wide installation. + + ++h(3, "import-error") Import error + ++code(false, "text"). + Import Error: No module named spacy + +p + | This error means that the spaCy module can't be located on your system, or in + | your environment. + ++infobox("Solutions") + | Make sure you have spaCy installed. If you're using a #[code virtualenv], + | make sure it's activated and check that spaCy is installed in that + | environment – otherwise, you're trying to load a system installation. You + | can also run #[code which python] to find out where your Python + | executable is located. + ++h(3, "import-error-models") Import error: models + ++code(false, "text"). + ImportError: No module named 'en_core_web_sm' + +p + | As of spaCy v1.7, all models can be installed as Python packages. This means + | that they'll become importable modules of your application. When creating + | #[+a("/docs/usage/models/#usage") shortcut links], spaCy will also try + | to import the model to load its meta data. If this fails, it's usually a + | sign that the package is not installed in the current environment. + ++infobox("Solutions") + | Run #[code pip list] or #[code pip freeze] to check which model packages + | you have installed, and install the + | #[+a("/docs/usage/models#available") correct models] if necessary. If you're + | importing a model manually at the top of a file, make sure to use the name + | of the package, not the shortcut link you've created. + ++h(3, "vocab-strings") File not found: vocab/strings.json + ++code(false, "text"). + FileNotFoundError: No such file or directory: [...]/vocab/strings.json + +p + | This error may occur when using #[code spacy.load()] to load + | a language model – either because you haven't set up a + | #[+a("/docs/usage/models/#usage") shortcut link] for it, or because it + | doesn't actually exist. + ++infobox("Solutions") + | Set up a #[+a("/docs/usage/models/#usage") shortcut link] for the model + | you want to load. This can either be an installed model package, or a + | local directory containing the model data. If you want to use one of the + | #[+a("/docs/api/language-models/#alpha-support") alpha tokenizers] for + | languages that don't yet have a statistical model, you should import its + | #[code Language] class instead, for example + | #[code from spacy.fr import French]. + ++h(3, "command-not-found") Command not found + ++code(false, "text"). + command not found: spacy + +p + | This error may occur when running the #[code spacy] command from the + | command line. spaCy does not currently add an entry to our #[code PATH] + | environment variable, as this can lead to unexpected results, especially + | when using #[code virtualenv]. Instead, commands need to be prefixed with + | #[code python -m]. + ++infobox("Solution") + | Run the command with #[code python -m], for example + | #[code python -m spacy download en]. For more info on this, see the + | #[+a("/docs/usage/cli") CLI documentation]. + ++h(2, "usage") Using spaCy + ++h(3, "pos-lemma-number") POS tag or lemma is returned as number + ++code. + doc = nlp(u'This is text.') + print([word.pos for word in doc]) + # [88, 98, 90, 95] + +p + | Like many NLP libraries, spaCy encodes all strings to integers. This + | reduces memory usage and improves efficiency. The integer mapping also + | makes it easy to interoperate with numpy. To access the string + | representation instead of the integer ID, add an underscore #[code _] + | after the attribute. + ++infobox("Solutions") + | Use #[code pos_] or #[code lemma_] instead. See the + | #[+api("token#attributes") #[code Token] attributes] for a list of available + | attributes and their string representations. + + ++h(3, "pron-lemma") Pronoun lemma is returned as #[code -PRON-] + ++code. + doc = nlp(u'They are') + print(doc[0].lemma_) + # -PRON- + +p + | This is in fact expected behaviour and not a bug. + | Unlike verbs and common nouns, there's no clear base form of a personal + | pronoun. Should the lemma of "me" be "I", or should we normalize person + | as well, giving "it" — or maybe "he"? spaCy's solution is to introduce a + | novel symbol, #[code -PRON-], which is used as the lemma for + | all personal pronouns. For more info on this, see the + | #[+api("annotation#lemmatization") annotation specs] on lemmatization. From 45d03ea05bf2ee4bdfb81d8329f966e9d3774187 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 14:11:22 +0200 Subject: [PATCH 11/12] Add BADGES settings and mixin for pip and conda badges --- website/_harp.json | 11 ++++++++++- website/_includes/_mixins-base.jade | 11 +++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/website/_harp.json b/website/_harp.json index 2cbfe538b..82e82093e 100644 --- a/website/_harp.json +++ b/website/_harp.json @@ -63,7 +63,16 @@ "user": "spacy.us12", "id": "83b0498b1e7fa3c91ce68c3f1", "list": "89ad33e698" + }, + "BADGES": { + "pipy": { + "badge": "https://img.shields.io/pypi/v/spacy.svg?style=flat-square", + "link": "https://pypi.python.org/pypi/spacy" + }, + "conda": { + "badge": "https://anaconda.org/conda-forge/spacy/badges/version.svg", + "link": "https://anaconda.org/conda-forge/spacy" + } } - } } diff --git a/website/_includes/_mixins-base.jade b/website/_includes/_mixins-base.jade index ea4d2964d..80aa98a00 100644 --- a/website/_includes/_mixins-base.jade +++ b/website/_includes/_mixins-base.jade @@ -110,6 +110,17 @@ mixin gitter(button, label) !=button +//- Badge + name - [string] "pipy" or "conda" + +mixin badge(name) + - site = BADGES[name] + + if site + +a(site.link).u-padding-small + img(src=site.badge alt="{name} version" height="20") + + //- Logo mixin logo() From afd839f64b6a9f05ada27950d6cdcabe2c1cf432 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 26 Mar 2017 14:11:31 +0200 Subject: [PATCH 12/12] Add pip and conda badges to installation docs --- website/docs/usage/index.jade | 2 ++ 1 file changed, 2 insertions(+) diff --git a/website/docs/usage/index.jade b/website/docs/usage/index.jade index ee2e6f544..044f5fb37 100644 --- a/website/docs/usage/index.jade +++ b/website/docs/usage/index.jade @@ -23,6 +23,7 @@ p >>> nlp = spacy.load('en') +h(2, "pip") pip + +badge("pipy") p Using pip, spaCy releases are currently only available as source packages. @@ -39,6 +40,7 @@ p pip install spacy +h(2, "conda") conda + +badge("conda") p | Thanks to our great community, we've finally re-added conda support. You