mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Raise E983 early on in docbin init (#9247)
* raise E983 early on in docbin init * catch situation before error is raised * add more info on the spacy debug command
This commit is contained in:
parent
effae12cbd
commit
a361df00cd
|
@ -657,7 +657,9 @@ class Errors:
|
||||||
"{nO} - cannot add any more labels.")
|
"{nO} - cannot add any more labels.")
|
||||||
E923 = ("It looks like there is no proper sample data to initialize the "
|
E923 = ("It looks like there is no proper sample data to initialize the "
|
||||||
"Model of component '{name}'. To check your input data paths and "
|
"Model of component '{name}'. To check your input data paths and "
|
||||||
"annotation, run: python -m spacy debug data config.cfg")
|
"annotation, run: python -m spacy debug data config.cfg "
|
||||||
|
"and include the same config override values you would specify "
|
||||||
|
"for the 'spacy train' command.")
|
||||||
E924 = ("The '{name}' component does not seem to be initialized properly. "
|
E924 = ("The '{name}' component does not seem to be initialized properly. "
|
||||||
"This is likely a bug in spaCy, so feel free to open an issue: "
|
"This is likely a bug in spaCy, so feel free to open an issue: "
|
||||||
"https://github.com/explosion/spaCy/issues")
|
"https://github.com/explosion/spaCy/issues")
|
||||||
|
@ -792,7 +794,7 @@ class Errors:
|
||||||
"to token boundaries.")
|
"to token boundaries.")
|
||||||
E982 = ("The `Token.ent_iob` attribute should be an integer indexing "
|
E982 = ("The `Token.ent_iob` attribute should be an integer indexing "
|
||||||
"into {values}, but found {value}.")
|
"into {values}, but found {value}.")
|
||||||
E983 = ("Invalid key for '{dict}': {key}. Available keys: "
|
E983 = ("Invalid key(s) for '{dict}': {key}. Available keys: "
|
||||||
"{keys}")
|
"{keys}")
|
||||||
E984 = ("Invalid component config for '{name}': component block needs either "
|
E984 = ("Invalid component config for '{name}': component block needs either "
|
||||||
"a key `factory` specifying the registered function used to "
|
"a key `factory` specifying the registered function used to "
|
||||||
|
|
|
@ -8,7 +8,7 @@ from thinc.api import NumpyOps
|
||||||
from .doc import Doc
|
from .doc import Doc
|
||||||
from ..vocab import Vocab
|
from ..vocab import Vocab
|
||||||
from ..compat import copy_reg
|
from ..compat import copy_reg
|
||||||
from ..attrs import SPACY, ORTH, intify_attr
|
from ..attrs import SPACY, ORTH, intify_attr, IDS
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
from ..util import ensure_path, SimpleFrozenList
|
from ..util import ensure_path, SimpleFrozenList
|
||||||
|
|
||||||
|
@ -64,7 +64,11 @@ class DocBin:
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/docbin#init
|
DOCS: https://spacy.io/api/docbin#init
|
||||||
"""
|
"""
|
||||||
attrs = sorted([intify_attr(attr) for attr in attrs])
|
int_attrs = [intify_attr(attr) for attr in attrs]
|
||||||
|
if None in int_attrs:
|
||||||
|
non_valid = [attr for attr in attrs if intify_attr(attr) is None]
|
||||||
|
raise KeyError(Errors.E983.format(dict="attrs", key=non_valid, keys=IDS.keys())) from None
|
||||||
|
attrs = sorted(int_attrs)
|
||||||
self.version = "0.1"
|
self.version = "0.1"
|
||||||
self.attrs = [attr for attr in attrs if attr != ORTH and attr != SPACY]
|
self.attrs = [attr for attr in attrs if attr != ORTH and attr != SPACY]
|
||||||
self.attrs.insert(0, ORTH) # Ensure ORTH is always attrs[0]
|
self.attrs.insert(0, ORTH) # Ensure ORTH is always attrs[0]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user