Update errors

2025-07-24 07:00:04 +03:00 · 2018-04-03 21:40:29 +02:00 · 2018-04-03 21:40:29 +02:00 · e5f47cd82d
commit e5f47cd82d
parent f7e6313b43
4 changed files with 16 additions and 15 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -239,6 +239,11 @@ class Errors(object):
            "existing extension, set `force=True` on `{obj}.set_extension`.")
    E091 = ("Invalid extension attribute {name}: expected callable or None, "
            "but got: {value}")
+    E092 = ("Could not find or assign name for word vectors. Ususally, the "
+            "name is read from the model's meta.json in vector.name. "
+            "Alternatively, it is built from the 'lang' and 'name' keys in "
+            "the meta.json. Vector names are required to avoid issue #1660.")
+    E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}")


@add_codes
@ -252,6 +257,10 @@ class TempErrors(object):
    T006 = ("Currently history width is hard-coded to 0. Received: {value}.")
    T007 = ("Can't yet set {attr} from Span. Vote for this feature on the "
            "issue tracker: http://github.com/explosion/spaCy/issues")
+    T008 = ("Bad configuration of Tagger. This is probably a bug within "
+            "spaCy. We changed the name of an internal attribute for loading "
+            "pre-trained vectors, and the class has been passed the old name "
+            "(pretrained_dims) but not the new name (pretrained_vectors).")


 class ModelsWarning(UserWarning):
--- a/spacy/language.py
+++ b/spacy/language.py
@ -707,7 +707,7 @@ def _fix_pretrained_vectors_name(nlp):
        vectors_name = '%s_%s.vectors' % (nlp.meta['lang'], nlp.meta['name'])
        nlp.vocab.vectors.name = vectors_name
    else:
-        raise ValueError("Unnamed vectors")
+        raise ValueError(Errors.E092)
    for name, proc in nlp.pipeline:
        if not hasattr(proc, 'cfg'):
            continue
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@ -207,7 +207,7 @@ class Pipe(object):
        def load_model(b):
            # TODO: Remove this once we don't have to handle previous models
            if 'pretrained_dims' in self.cfg and 'pretrained_vectors' not in self.cfg:
-                self.cfg['pretrained_vectors'] = self.vocab.vectors.name 
+                self.cfg['pretrained_vectors'] = self.vocab.vectors.name
            if self.model is True:
                self.model = self.Model(**self.cfg)
            self.model.from_bytes(b)
@ -234,7 +234,7 @@ class Pipe(object):
        def load_model(p):
            # TODO: Remove this once we don't have to handle previous models
            if 'pretrained_dims' in self.cfg and 'pretrained_vectors' not in self.cfg:
-                self.cfg['pretrained_vectors'] = self.vocab.vectors.name 
+                self.cfg['pretrained_vectors'] = self.vocab.vectors.name
            if self.model is True:
                self.model = self.Model(**self.cfg)
            self.model.from_bytes(p.open('rb').read())
@ -531,12 +531,7 @@ class Tagger(Pipe):
    @classmethod
    def Model(cls, n_tags, **cfg):
        if cfg.get('pretrained_dims') and not cfg.get('pretrained_vectors'):
-            raise ValueError(
-                "Bad configuration of Tagger --- this is probably a bug "
-                "within spaCy. We changed the name of an internal attribute "
-                "for loading pre-trained vectors, and the class has been "
-                "passed the old name (pretrained_dims) but not the new name "
-                "(pretrained_vectors)")
+            raise ValueError(TempErrors.T008)
        return build_tagger_model(n_tags, **cfg)

    def add_label(self, label, values=None):
@ -584,8 +579,8 @@ class Tagger(Pipe):
        def load_model(b):
            # TODO: Remove this once we don't have to handle previous models
            if 'pretrained_dims' in self.cfg and 'pretrained_vectors' not in self.cfg:
-                self.cfg['pretrained_vectors'] = self.vocab.vectors.name 
- 
+                self.cfg['pretrained_vectors'] = self.vocab.vectors.name
+
            if self.model is True:
                token_vector_width = util.env_opt(
                    'token_vector_width',
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -430,10 +430,7 @@ cdef class Doc:
                if token.ent_iob == 1:
                    if start == -1:
                        seq = ['%s|%s' % (t.text, t.ent_iob_) for t in self[i-5:i+5]]
-                        raise ValueError(
-                            "token.ent_iob values make invalid sequence: "
-                            "I without B\n"
-                            "{seq}".format(seq=' '.join(seq)))
+                        raise ValueError(Errors.E093.format(seq=' '.join(seq)))
                elif token.ent_iob == 2 or token.ent_iob == 0:
                    if start != -1:
                        output.append(Span(self, start, i, label=label))