Update errors

This commit is contained in:
ines 2018-04-03 21:40:29 +02:00
parent f7e6313b43
commit e5f47cd82d
4 changed files with 16 additions and 15 deletions

View File

@ -239,6 +239,11 @@ class Errors(object):
"existing extension, set `force=True` on `{obj}.set_extension`.")
E091 = ("Invalid extension attribute {name}: expected callable or None, "
"but got: {value}")
E092 = ("Could not find or assign name for word vectors. Ususally, the "
"name is read from the model's meta.json in vector.name. "
"Alternatively, it is built from the 'lang' and 'name' keys in "
"the meta.json. Vector names are required to avoid issue #1660.")
E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}")
@add_codes
@ -252,6 +257,10 @@ class TempErrors(object):
T006 = ("Currently history width is hard-coded to 0. Received: {value}.")
T007 = ("Can't yet set {attr} from Span. Vote for this feature on the "
"issue tracker: http://github.com/explosion/spaCy/issues")
T008 = ("Bad configuration of Tagger. This is probably a bug within "
"spaCy. We changed the name of an internal attribute for loading "
"pre-trained vectors, and the class has been passed the old name "
"(pretrained_dims) but not the new name (pretrained_vectors).")
class ModelsWarning(UserWarning):

View File

@ -707,7 +707,7 @@ def _fix_pretrained_vectors_name(nlp):
vectors_name = '%s_%s.vectors' % (nlp.meta['lang'], nlp.meta['name'])
nlp.vocab.vectors.name = vectors_name
else:
raise ValueError("Unnamed vectors")
raise ValueError(Errors.E092)
for name, proc in nlp.pipeline:
if not hasattr(proc, 'cfg'):
continue

View File

@ -207,7 +207,7 @@ class Pipe(object):
def load_model(b):
# TODO: Remove this once we don't have to handle previous models
if 'pretrained_dims' in self.cfg and 'pretrained_vectors' not in self.cfg:
self.cfg['pretrained_vectors'] = self.vocab.vectors.name
self.cfg['pretrained_vectors'] = self.vocab.vectors.name
if self.model is True:
self.model = self.Model(**self.cfg)
self.model.from_bytes(b)
@ -234,7 +234,7 @@ class Pipe(object):
def load_model(p):
# TODO: Remove this once we don't have to handle previous models
if 'pretrained_dims' in self.cfg and 'pretrained_vectors' not in self.cfg:
self.cfg['pretrained_vectors'] = self.vocab.vectors.name
self.cfg['pretrained_vectors'] = self.vocab.vectors.name
if self.model is True:
self.model = self.Model(**self.cfg)
self.model.from_bytes(p.open('rb').read())
@ -531,12 +531,7 @@ class Tagger(Pipe):
@classmethod
def Model(cls, n_tags, **cfg):
if cfg.get('pretrained_dims') and not cfg.get('pretrained_vectors'):
raise ValueError(
"Bad configuration of Tagger --- this is probably a bug "
"within spaCy. We changed the name of an internal attribute "
"for loading pre-trained vectors, and the class has been "
"passed the old name (pretrained_dims) but not the new name "
"(pretrained_vectors)")
raise ValueError(TempErrors.T008)
return build_tagger_model(n_tags, **cfg)
def add_label(self, label, values=None):
@ -584,8 +579,8 @@ class Tagger(Pipe):
def load_model(b):
# TODO: Remove this once we don't have to handle previous models
if 'pretrained_dims' in self.cfg and 'pretrained_vectors' not in self.cfg:
self.cfg['pretrained_vectors'] = self.vocab.vectors.name
self.cfg['pretrained_vectors'] = self.vocab.vectors.name
if self.model is True:
token_vector_width = util.env_opt(
'token_vector_width',

View File

@ -430,10 +430,7 @@ cdef class Doc:
if token.ent_iob == 1:
if start == -1:
seq = ['%s|%s' % (t.text, t.ent_iob_) for t in self[i-5:i+5]]
raise ValueError(
"token.ent_iob values make invalid sequence: "
"I without B\n"
"{seq}".format(seq=' '.join(seq)))
raise ValueError(Errors.E093.format(seq=' '.join(seq)))
elif token.ent_iob == 2 or token.ent_iob == 0:
if start != -1:
output.append(Span(self, start, i, label=label))