Merge branch 'master' of ssh://github.com/spacy-io/spaCy

2025-11-01 00:17:44 +03:00 · 2016-09-21 12:08:25 +02:00 · 2016-09-21 12:08:25 +02:00 · 6b202ec43f
commit 6b202ec43f
parent 7c2f1a673b 2f7ef4b150
4 changed files with 11 additions and 10 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -5,7 +5,7 @@ cymem>=1.30,<1.32
 preshed>=0.46.1,<0.47.0
 thinc>=5.0.0,<5.1.0
 murmurhash>=0.26,<0.27
-plac
+plac<0.9.3
 six
 ujson
 cloudpickle
--- a/spacy/download.py
+++ b/spacy/download.py
@ -9,15 +9,16 @@ from sputnik.package_list import (PackageNotFoundException,
 from . import about
-def download(lang, force=False):
+def download(lang, force=False, fail_on_exist=True):
    if force:
        sputnik.purge(about.__title__, about.__version__)
    try:
        sputnik.package(about.__title__, about.__version__, about.__models__[lang])
-        print("Model already installed. Please run 'python -m "
+        if fail_on_exist:
-              "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
+            print("Model already installed. Please run 'python -m "
-        sys.exit(1)
+                  "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
            sys.exit(0)
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
        pass
--- a/website/blog/sense2vec-with-spacy.jade
+++ b/website/blog/sense2vec-with-spacy.jade
@ -37,7 +37,7 @@ p Here's how the current pre-processing function looks, at the time of writing.
                        # Merge them into single tokens
                        ent.merge(ent.root.tag_, ent.text, ent.label_)
            token_strings = []
-            for token in tokens:
+            for token in doc:
                text = token.text.replace(' ', '_')
                tag = token.ent_type_ or token.pos_
                token_strings.append('%s|%s' % (text, tag))
--- a/website/docs/_quickstart-examples.jade
+++ b/website/docs/_quickstart-examples.jade
@ -46,13 +46,13 @@
        assert token.shape_ == 'Xxxxx'
        for lexeme in nlp.vocab:
            if lexeme.is_alpha:
-                lexeme.shape_ = 'W'
+                lexeme.shape_ = u'W'
            elif lexeme.is_digit:
-                lexeme.shape_ = 'D'
+                lexeme.shape_ = u'D'
            elif lexeme.is_punct:
-                lexeme.shape_ = 'P'
+                lexeme.shape_ = u'P'
            else:
-                lexeme.shape_ = 'M'
+                lexeme.shape_ = u'M'
        assert token.shape_ == 'W'
    +h3('examples-numpy-arrays') Export to numpy arrays