Merge CLI changes

2026-03-06 12:51:26 +03:00 · 2017-03-26 07:30:45 -05:00 · 2017-03-26 07:30:45 -05:00 · 9dcb58aaaf
commit 9dcb58aaaf
parent 6b7f7a2060 afd839f64b
13 changed files with 213 additions and 14 deletions
--- a/spacy/main.py
+++ b/spacy/main.py
@ -91,7 +91,6 @@ class CLI(object):
              parser_L1=0.0,
              no_tagger=False, no_parser=False, no_ner=False):
        """Train a model."""
-
        cli_train(lang, output_dir, train_data, dev_data, n_iter,
                  not no_tagger, not no_parser, not no_ner,
                  parser_L1)
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -36,12 +36,13 @@ def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ne
        'lang': language,
        'features': lang.Defaults.tagger_features}
    gold_train = list(read_gold_json(train_path))
-    gold_dev = list(read_gold_json(dev_path))
+    gold_dev = list(read_gold_json(dev_path)) if dev_path else None

    train_model(lang, gold_train, gold_dev, output_path, tagger_cfg, parser_cfg,
                entity_cfg, n_iter)
-    scorer = evaluate(lang, list(read_gold_json(dev_path)), output_path)
-    print_results(scorer)
+    if gold_dev:
+        scorer = evaluate(lang, gold_dev, output_path)
+        print_results(scorer)


 def train_config(config):
@ -64,7 +65,7 @@ def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_
        for itn, epoch in enumerate(trainer.epochs(n_iter, augment_data=None)):
            for doc, gold in epoch:
                trainer.update(doc, gold)
-            dev_scores = trainer.evaluate(dev_data)
+            dev_scores = trainer.evaluate(dev_data) if dev_data else []
            print_progress(itn, trainer.nlp.parser.model.nr_weight,
                           trainer.nlp.parser.model.nr_active_feat,
                           **dev_scores.scores)
@ -92,8 +93,10 @@ def evaluate(Language, gold_tuples, output_path):
 def check_dirs(output_path, train_path, dev_path):
    if not output_path.exists():
        util.sys_exit(output_path.as_posix(), title="Output directory not found")
-    if not train_path.exists() and train_path.is_file():
+    if not train_path.exists() or not train_path.is_file():
        util.sys_exit(train_path.as_posix(), title="Training data not found")
+    if dev_path and not dev_path.exists():
+        util.sys_exit(dev_path.as_posix(), title="Development data not found")


 def print_progress(itn, nr_weight, nr_active_feat, **scores):
--- a/website/_harp.json
+++ b/website/_harp.json
@ -28,7 +28,8 @@

        "NAVIGATION": {
            "Home": "/",
-            "Docs": "/docs",
+            "Usage": "/docs/usage",
+            "API": "/docs/api",
            "Demos": "/docs/usage/showcase",
            "Blog": "https://explosion.ai/blog"
        },
@ -62,7 +63,16 @@
            "user": "spacy.us12",
            "id": "83b0498b1e7fa3c91ce68c3f1",
            "list": "89ad33e698"
+        },
+        "BADGES": {
+            "pipy": {
+                "badge": "https://img.shields.io/pypi/v/spacy.svg?style=flat-square",
+                "link": "https://pypi.python.org/pypi/spacy"
+            },
+            "conda": {
+                "badge": "https://anaconda.org/conda-forge/spacy/badges/version.svg",
+                "link": "https://anaconda.org/conda-forge/spacy"
+            }
        }
-
    }
 }
--- a/website/_includes/_mixins-base.jade
+++ b/website/_includes/_mixins-base.jade
@ -110,6 +110,17 @@ mixin gitter(button, label)
        !=button


+//- Badge
+    name - [string] "pipy" or "conda"
+
+mixin badge(name)
+    - site = BADGES[name]
+
+    if site
+        +a(site.link).u-padding-small
+            img(src=site.badge alt="{name} version" height="20")
+
+
 //- Logo

 mixin logo()
--- a/website/_includes/_navigation.jade
+++ b/website/_includes/_navigation.jade
@ -6,11 +6,11 @@ nav.c-nav.u-text.js-nav(class=landing ? "c-nav--theme" : null)
    a(href='/') #[+logo]

    if SUBSECTION != "index"
-        .u-text-label.u-padding-small=SUBSECTION
+        .u-text-label.u-padding-small.u-hidden-xs=SUBSECTION

    ul.c-nav__menu
        each url, item in NAVIGATION
-            li.c-nav__menu__item
+            li.c-nav__menu__item(class=(url == "/") ? "u-hidden-xs" : null)
                +a(url)=item

        li.c-nav__menu__item
--- a/website/assets/css/_base/_layout.sass
+++ b/website/assets/css/_base/_layout.sass
@ -34,3 +34,4 @@ main aside a
 ::selection
    background: $color-theme
    color: $color-back
+    text-shadow: none
--- a/website/assets/css/_base/_objects.sass
+++ b/website/assets/css/_base/_objects.sass
@ -68,7 +68,7 @@
 .o-box
    background: $color-theme-light
    padding: 2rem
-    border: 1px solid darken($color-theme-light, 5)
+    border-left: 4px solid $color-theme


 //- Icons
--- a/website/assets/css/_components/_code.sass
+++ b/website/assets/css/_components/_code.sass
@ -26,15 +26,19 @@

 *:not(.c-code-block) > code
    font: normal 600 0.8em/#{1} $font-code
-    background: $color-subtle-light
+    background: rgba($color-front, 0.05)
+    box-shadow: 1px 1px 0 rgba($color-front, 0.1)
+    text-shadow: 1px 1px 0 rgba($color-back, 0.5)
    color: $color-front
    padding: 0.1em 0.5em
    margin: 0
    border-radius: 1px
+    box-decoration-break: clone

    .c-aside__content &
        background: lighten($color-front, 10)
        color: $color-back
+        text-shadow: none


 //- Syntax Highlighting
--- a/website/docs/index.jade
+++ b/website/docs/index.jade
@ -5,7 +5,7 @@ include ../_includes/_mixins
 +aside("Help us improve the docs")
    |  Did you spot a mistake or come across explanations that
    |  are unclear? You can find a "Suggest edits" button at the
-    |  bottom at each page that points you to the source.
+    |  bottom of each page that points you to the source.
    |  We always appreciate
    |  #[+a(gh("spaCy") + "/pulls") pull requests].#[br]#[br]
    |  Have you built something cool with spaCy, or did you
--- a/website/docs/usage/_data.json
+++ b/website/docs/usage/_data.json
@ -5,6 +5,7 @@
            "Models": "models",
            "Lightning tour": "lightning-tour",
            "Command line": "cli",
+            "Troubleshooting": "troubleshooting",
            "Resources": "resources"
        },
        "Workflows": {
@ -45,6 +46,11 @@

    "cli": {
        "title": "Command Line Interface",
+        "next": "troubleshooting"
+    },
+
+    "troubleshooting": {
+        "title": "Troubleshooting",
        "next": "resources"
    },

--- a/website/docs/usage/cli.jade
+++ b/website/docs/usage/cli.jade
@ -92,7 +92,7 @@ p
    +row
        +cell #[code model]
        +cell positional
-        +cell Shortcut link of model #[strong (optional)].
+        +cell Shortcut link of model (optional).

    +row
        +cell #[code --markdown], #[code -md]
--- a/website/docs/usage/index.jade
+++ b/website/docs/usage/index.jade
@ -23,6 +23,7 @@ p
        &gt;&gt;&gt; nlp = spacy.load('en')

 +h(2, "pip") pip
+    +badge("pipy")

 p Using pip, spaCy releases are currently only available as source packages.

@ -39,6 +40,7 @@ p
    pip install spacy

 +h(2, "conda") conda
+    +badge("conda")

 p
    |  Thanks to our great community, we've finally re-added conda support. You
--- a/website/docs/usage/troubleshooting.jade
+++ b/website/docs/usage/troubleshooting.jade
@ -0,0 +1,163 @@
+//- 💫 DOCS > USAGE > TROUBLESHOOTING
+
+include ../../_includes/_mixins
+
+p
+    |  This section collects some of the most common errors you may come
+    |  across when installing, loading and using spaCy, as well as their solutions.
+
+aside("Help us improve this guide")
+    |  Did you come across a problem like the ones listed here and want to
+    |  share the solution? You can find the "Suggest edits" button at the
+    |  bottom of this page that points you to the source. We always
+    |  appreciate #[+a(gh("spaCy") + "/pulls") pull requests]!
+
+h(2, "install-loading") Installation and loading
+
+h(3, "compatible-model") No compatible model found
+
+code(false, "text").
+    No compatible model found for [lang] (spaCy v#{SPACY_VERSION}).
+
+p
+    |  This usually means that the model you're trying to download does not
+    |  exist, or isn't available for your version of spaCy.
+
+infobox("Solutions")
+    |  Check the #[+a(gh("spacy-models", "compatibility.json")) compatibility table]
+    |  to see which models are available for your spaCy version. If you're using
+    |  an old version, consider upgrading to the latest release. Note that while
+    |  spaCy supports tokenization for
+    |  #[+a("/docs/api/language-models/#alpha-support") a variety of languages],
+    |  not all of them come with statistical models. To only use the tokenizer,
+    |  import the language's #[code Language] class instead, for example
+    |  #[code from spacy.fr import French].
+
+
+h(3, "symlink-privilege") Symbolic link privilege not held
+
+code(false, "text").
+    OSError: symbolic link privilege not held
+
+p
+    |  To create #[+a("/docs/usage/models/#usage") shortcut links] that let you
+    |  load models by name, spaCy creates a symbolic link in the
+    |  #[code spacy/data] directory. This means your user needs permission to do
+    |  this. The above error mostly occurs when doing a system-wide installation,
+    |  which will create the symlinks in a system directory.
+
+infobox("Solutions")
+    |  Run the #[code download] or #[code link] command as administrator,
+    |  or use a #[code virtualenv] to install spaCy in a user directory, instead
+    |  of doing a system-wide installation.
+
+
+h(3, "import-error") Import error
+
+code(false, "text").
+    Import Error: No module named spacy
+
+p
+    |  This error means that the spaCy module can't be located on your system, or in
+    |  your environment.
+
+infobox("Solutions")
+    |  Make sure you have spaCy installed. If you're using a #[code virtualenv],
+    |  make sure it's activated and check that spaCy is installed in that
+    |  environment – otherwise, you're trying to load a system installation. You
+    |  can also run #[code which python] to find out where your Python
+    |  executable is located.
+
+h(3, "import-error-models") Import error: models
+
+code(false, "text").
+    ImportError: No module named 'en_core_web_sm'
+
+p
+    |  As of spaCy v1.7, all models can be installed as Python packages. This means
+    |  that they'll become importable modules of your application. When creating
+    |  #[+a("/docs/usage/models/#usage") shortcut links], spaCy will also try
+    |  to import the model to load its meta data. If this fails, it's usually a
+    |  sign that the package is not installed in the current environment.
+
+infobox("Solutions")
+    |  Run #[code pip list] or #[code pip freeze] to check which model packages
+    |  you have installed, and install the
+    |  #[+a("/docs/usage/models#available") correct models] if necessary. If you're
+    |  importing a model manually at the top of a file, make sure to use the name
+    |  of the package, not the shortcut link you've created.
+
+h(3, "vocab-strings") File not found: vocab/strings.json
+
+code(false, "text").
+    FileNotFoundError: No such file or directory: [...]/vocab/strings.json
+
+p
+    |  This error may occur when using #[code spacy.load()] to load
+    |  a language model – either because you haven't set up a
+    |  #[+a("/docs/usage/models/#usage") shortcut link] for it, or because it
+    |  doesn't actually exist.
+
+infobox("Solutions")
+    |  Set up a #[+a("/docs/usage/models/#usage") shortcut link] for the model
+    |  you want to load. This can either be an installed model package, or a
+    |  local directory containing the model data. If you want to use one of the
+    |  #[+a("/docs/api/language-models/#alpha-support") alpha tokenizers] for
+    |  languages that don't yet have a statistical model, you should import its
+    |  #[code Language] class instead, for example
+    |  #[code from spacy.fr import French].
+
+h(3, "command-not-found") Command not found
+
+code(false, "text").
+    command not found: spacy
+
+p
+    |  This error may occur when running the #[code spacy] command from the
+    |  command line. spaCy does not currently add an entry to our #[code PATH]
+    |  environment variable, as  this can lead to unexpected results, especially
+    |  when using #[code virtualenv]. Instead, commands need to be prefixed with
+    |  #[code python -m].
+
+infobox("Solution")
+    |  Run the command with #[code python -m], for example
+    |  #[code python -m spacy download en]. For more info on this, see the
+    |  #[+a("/docs/usage/cli") CLI documentation].
+
+h(2, "usage") Using spaCy
+
+h(3, "pos-lemma-number") POS tag or lemma is returned as number
+
+code.
+    doc = nlp(u'This is text.')
+    print([word.pos for word in doc])
+    # [88, 98, 90, 95]
+
+p
+    |  Like many NLP libraries, spaCy encodes all strings to integers. This
+    |  reduces memory usage and improves efficiency. The integer mapping also
+    |  makes it easy to interoperate with numpy. To access the string
+    |  representation instead of the integer ID, add an underscore #[code _]
+    |  after the attribute.
+
+infobox("Solutions")
+    |  Use #[code pos_] or #[code lemma_] instead. See the
+    |  #[+api("token#attributes") #[code Token] attributes] for a list of available
+    |  attributes and their string representations.
+
+
+h(3, "pron-lemma") Pronoun lemma is returned as #[code -PRON-]
+
+code.
+    doc = nlp(u'They are')
+    print(doc[0].lemma_)
+    # -PRON-
+
+p
+    |  This is in fact expected behaviour and not a bug.
+    |  Unlike verbs and common nouns, there's no clear base form of a personal
+    |  pronoun. Should the lemma of "me" be "I", or should we normalize person
+    |  as well, giving "it" — or maybe "he"? spaCy's solution is to introduce a
+    |  novel symbol, #[code -PRON-], which is used as the lemma for
+    |  all personal pronouns. For more info on this, see the
+    |  #[+api("annotation#lemmatization") annotation specs] on lemmatization.