diff --git a/spacy/about.py b/spacy/about.py index 0c9ced15d..34ac75ccd 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -10,7 +10,7 @@ __author__ = 'Matthew Honnibal' __email__ = 'matt@explosion.ai' __license__ = 'MIT' -__docs__ = 'https://spacy.io/docs/usage' +__docs_models__ = 'https://spacy.io/docs/usage/models' __download_url__ = 'https://github.com/explosion/spacy-models/releases/download' __compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json' __shortcuts__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/shortcuts.json' diff --git a/spacy/cli/download.py b/spacy/cli/download.py index d939d8436..b6375a908 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -28,7 +28,7 @@ def get_json(url, desc): if r.status_code != 200: prints("Couldn't fetch %s. Please find a model for your spaCy installation " "(v%s), and download it manually." % (desc, about.__version__), - about.__docs__, title="Server error (%d)" % r.status_code, exits=True) + about.__docs_models__, title="Server error (%d)" % r.status_code, exits=True) return r.json() diff --git a/spacy/deprecated.py b/spacy/deprecated.py index a0cbb12a1..3afd7f3a2 100644 --- a/spacy/deprecated.py +++ b/spacy/deprecated.py @@ -16,7 +16,8 @@ def depr_model_download(lang): """ prints("The spacy.%s.download command is now deprecated. Please use " "python -m spacy download [model name or shortcut] instead. For " - "more info, see the docs: %s." % (lang, about.__docs__), + "more info, see the documentation:" % lang, + about.__docs_models__, "Downloading default '%s' model now..." % lang, title="Warning: deprecated command") download(lang) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 87cb23a18..0c65f101e 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -23,6 +23,7 @@ from .attrs cimport IS_QUOTE from .attrs cimport IS_LEFT_PUNCT from .attrs cimport IS_RIGHT_PUNCT from .attrs cimport IS_OOV +from . import about memset(&EMPTY_LEXEME, 0, sizeof(LexemeC)) @@ -136,10 +137,10 @@ cdef class Lexeme: cdef int length = self.vocab.vectors_length if length == 0: raise ValueError( - "Word vectors set to length 0. This may be because the " - "data is not installed. If you haven't already, run" - "\npython -m spacy download %s\n" - "to install the data." % self.vocab.lang + "Word vectors set to length 0. This may be because you " + "don't have a model installed or loaded, or because your " + "model doesn't include word vectors. For more info, see " + "the documentation: \n%s\n" % about.__docs_models__ ) vector_view = self.c.vector diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 8d8ddc9db..f9325cded 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -28,6 +28,7 @@ from ..parts_of_speech cimport CCONJ, PUNCT, NOUN, univ_pos_t from ..syntax.iterators import CHUNKERS from ..util import normalize_slice from ..compat import is_config +from .. import about DEF PADDING = 5 @@ -397,9 +398,8 @@ cdef class Doc: if not self.is_parsed: raise ValueError( "noun_chunks requires the dependency parse, which " - "requires data to be installed. If you haven't done so, run: " - "\npython -m spacy download %s\n" - "to install the data" % self.vocab.lang) + "requires data to be installed. For more info, see the " + "documentation: \n%s\n" % about.__docs_models__) # Accumulate the result before beginning to iterate over it. This prevents # the tokenisation from being changed out from under us during the iteration. # The tricky thing here is that Span accepts its tokenisation changing, @@ -430,9 +430,8 @@ cdef class Doc: if not self.is_parsed: raise ValueError( "sentence boundary detection requires the dependency parse, which " - "requires data to be installed. If you haven't done so, run: " - "\npython -m spacy download %s\n" - "to install the data" % self.vocab.lang) + "requires data to be installed. For more info, see the " + "documentation: \n%s\n" % about.__docs_models__) cdef int i start = 0 for i in range(1, self.length): diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index fb1e5c732..09927ab4c 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -16,6 +16,7 @@ from ..util import normalize_slice from ..attrs cimport IS_PUNCT, IS_SPACE from ..lexeme cimport Lexeme from ..compat import is_config +from .. import about cdef class Span: @@ -221,9 +222,8 @@ cdef class Span: if not self.doc.is_parsed: raise ValueError( "noun_chunks requires the dependency parse, which " - "requires data to be installed. If you haven't done so, run: " - "\npython -m spacy download %s\n" - "to install the data" % self.vocab.lang) + "requires data to be installed. For more info, see the " + "documentation: \n%s\n" % about.__docs_models__) # Accumulate the result before beginning to iterate over it. This prevents # the tokenisation from being changed out from under us during the iteration. # The tricky thing here is that Span accepts its tokenisation changing, diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index d9dc123e7..6430c9f29 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -19,6 +19,7 @@ from ..attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP from ..attrs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER from ..attrs cimport LEMMA, POS, TAG, DEP from ..compat import is_config +from .. import about cdef class Token: @@ -230,10 +231,10 @@ cdef class Token: cdef int length = self.vocab.vectors_length if length == 0: raise ValueError( - "Word vectors set to length 0. This may be because the " - "data is not installed. If you haven't already, run" - "\npython -m spacy download %s\n" - "to install the data." % self.vocab.lang + "Word vectors set to length 0. This may be because you " + "don't have a model installed or loaded, or because your " + "model doesn't include word vectors. For more info, see " + "the documentation: \n%s\n" % about.__docs_models__ ) vector_view = self.c.lex.vector return numpy.asarray(vector_view)