Tidy up errors and warnings

This commit is contained in:
Ines Montani 2020-10-04 11:16:31 +02:00
parent ff914f4e6f
commit bcd52e5486
15 changed files with 186 additions and 221 deletions

View File

@ -322,8 +322,7 @@ def git_checkout(
if dest.exists(): if dest.exists():
msg.fail("Destination of checkout must not exist", exits=1) msg.fail("Destination of checkout must not exist", exits=1)
if not dest.parent.exists(): if not dest.parent.exists():
raise IOError("Parent of destination of checkout must exist") msg.fail("Parent of destination of checkout must exist", exits=1)
if sparse and git_version >= (2, 22): if sparse and git_version >= (2, 22):
return git_sparse_checkout(repo, subpath, dest, branch) return git_sparse_checkout(repo, subpath, dest, branch)
elif sparse: elif sparse:

View File

@ -16,8 +16,6 @@ def add_codes(err_cls):
@add_codes @add_codes
class Warnings: class Warnings:
W004 = ("No text fixing enabled. Run `pip install ftfy` to enable fixing "
"using ftfy.fix_text if necessary.")
W005 = ("Doc object not parsed. This means displaCy won't be able to " W005 = ("Doc object not parsed. This means displaCy won't be able to "
"generate a dependency visualization for it. Make sure the Doc " "generate a dependency visualization for it. Make sure the Doc "
"was processed with a model that supports dependency parsing, and " "was processed with a model that supports dependency parsing, and "
@ -51,8 +49,6 @@ class Warnings:
W017 = ("Alias '{alias}' already exists in the Knowledge Base.") W017 = ("Alias '{alias}' already exists in the Knowledge Base.")
W018 = ("Entity '{entity}' already exists in the Knowledge Base - " W018 = ("Entity '{entity}' already exists in the Knowledge Base - "
"ignoring the duplicate entry.") "ignoring the duplicate entry.")
W020 = ("Unnamed vectors. This won't allow multiple vectors models to be "
"loaded. (Shape: {shape})")
W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be " W021 = ("Unexpected hash collision in PhraseMatcher. Matches may be "
"incorrect. Modify PhraseMatcher._terminal_hash to fix.") "incorrect. Modify PhraseMatcher._terminal_hash to fix.")
W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in " W024 = ("Entity '{entity}' - Alias '{alias}' combination already exists in "
@ -65,7 +61,7 @@ class Warnings:
"be more efficient to split your training data into multiple " "be more efficient to split your training data into multiple "
"smaller JSON files instead.") "smaller JSON files instead.")
W028 = ("Doc.from_array was called with a vector of type '{type}', " W028 = ("Doc.from_array was called with a vector of type '{type}', "
"but is expecting one of type 'uint64' instead. This may result " "but is expecting one of type uint64 instead. This may result "
"in problems with the vocab further on in the pipeline.") "in problems with the vocab further on in the pipeline.")
W030 = ("Some entities could not be aligned in the text \"{text}\" with " W030 = ("Some entities could not be aligned in the text \"{text}\" with "
"entities \"{entities}\". Use " "entities \"{entities}\". Use "
@ -79,18 +75,17 @@ class Warnings:
"If this is surprising, make sure you have the spacy-lookups-data " "If this is surprising, make sure you have the spacy-lookups-data "
"package installed. The languages with lexeme normalization tables " "package installed. The languages with lexeme normalization tables "
"are currently: {langs}") "are currently: {langs}")
W034 = ("Please install the package spacy-lookups-data in order to include "
"the default lexeme normalization table for the language '{lang}'.")
W035 = ('Discarding subpattern "{pattern}" due to an unrecognized ' W035 = ('Discarding subpattern "{pattern}" due to an unrecognized '
"attribute or operator.") "attribute or operator.")
# TODO: fix numbering after merging develop into master # TODO: fix numbering after merging develop into master
W088 = ("The pipeline component {name} implements a 'begin_training' " W088 = ("The pipeline component {name} implements a `begin_training` "
"method, which won't be called by spaCy. As of v3.0, 'begin_training' " "method, which won't be called by spaCy. As of v3.0, `begin_training` "
"has been renamed to 'initialize' so you likely want to rename the " "has been renamed to `initialize`, so you likely want to rename the "
"component method. See the documentation for details: " "component method. See the documentation for details: "
"https://nightly.spacy.io/api/language#initialize") "https://nightly.spacy.io/api/language#initialize")
W089 = ("The nlp.begin_training method has been renamed to nlp.initialize.") W089 = ("As of spaCy v3.0, the `nlp.begin_training` method has been renamed "
"to `nlp.initialize`.")
W090 = ("Could not locate any {format} files in path '{path}'.") W090 = ("Could not locate any {format} files in path '{path}'.")
W091 = ("Could not clean/remove the temp directory at {dir}: {msg}.") W091 = ("Could not clean/remove the temp directory at {dir}: {msg}.")
W092 = ("Ignoring annotations for sentence starts, as dependency heads are set.") W092 = ("Ignoring annotations for sentence starts, as dependency heads are set.")
@ -108,39 +103,33 @@ class Warnings:
"download a newer compatible model or retrain your custom model " "download a newer compatible model or retrain your custom model "
"with the current spaCy version. For more details and available " "with the current spaCy version. For more details and available "
"updates, run: python -m spacy validate") "updates, run: python -m spacy validate")
W096 = ("The method 'disable_pipes' has become deprecated - use 'select_pipes' " W096 = ("The method `nlp.disable_pipes` is now deprecated - use "
"instead.") "`nlp.select_pipes` instead.")
W097 = ("No Model config was provided to create the '{name}' component, "
"and no default configuration could be found either.")
W098 = ("No Model config was provided to create the '{name}' component, "
"so a default configuration was used.")
W099 = ("Expected 'dict' type for the 'model' argument of pipe '{pipe}', "
"but got '{type}' instead, so ignoring it.")
W100 = ("Skipping unsupported morphological feature(s): '{feature}'. " W100 = ("Skipping unsupported morphological feature(s): '{feature}'. "
"Provide features as a dict {{\"Field1\": \"Value1,Value2\"}} or " "Provide features as a dict {{\"Field1\": \"Value1,Value2\"}} or "
"string \"Field1=Value1,Value2|Field2=Value3\".") "string \"Field1=Value1,Value2|Field2=Value3\".")
W101 = ("Skipping `Doc` custom extension '{name}' while merging docs.") W101 = ("Skipping Doc custom extension '{name}' while merging docs.")
W102 = ("Skipping unsupported user data '{key}: {value}' while merging docs.") W102 = ("Skipping unsupported user data '{key}: {value}' while merging docs.")
W103 = ("Unknown {lang} word segmenter '{segmenter}'. Supported " W103 = ("Unknown {lang} word segmenter '{segmenter}'. Supported "
"word segmenters: {supported}. Defaulting to {default}.") "word segmenters: {supported}. Defaulting to {default}.")
W104 = ("Skipping modifications for '{target}' segmenter. The current " W104 = ("Skipping modifications for '{target}' segmenter. The current "
"segmenter is '{current}'.") "segmenter is '{current}'.")
W105 = ("As of spaCy v3.0, the {matcher}.pipe method is deprecated. If you " W105 = ("As of spaCy v3.0, the `{matcher}.pipe` method is deprecated. If you "
"need to match on a stream of documents, you can use nlp.pipe and " "need to match on a stream of documents, you can use `nlp.pipe` and "
"call the {matcher} on each Doc object.") "call the {matcher} on each Doc object.")
W107 = ("The property Doc.{prop} is deprecated. Use " W107 = ("The property `Doc.{prop}` is deprecated. Use "
"Doc.has_annotation(\"{attr}\") instead.") "`Doc.has_annotation(\"{attr}\")` instead.")
@add_codes @add_codes
class Errors: class Errors:
E001 = ("No component '{name}' found in pipeline. Available names: {opts}") E001 = ("No component '{name}' found in pipeline. Available names: {opts}")
E002 = ("Can't find factory for '{name}' for language {lang} ({lang_code}). " E002 = ("Can't find factory for '{name}' for language {lang} ({lang_code}). "
"This usually happens when spaCy calls nlp.{method} with custom " "This usually happens when spaCy calls `nlp.{method}` with custom "
"component name that's not registered on the current language class. " "component name that's not registered on the current language class. "
"If you're using a custom component, make sure you've added the " "If you're using a custom component, make sure you've added the "
"decorator @Language.component (for function components) or " "decorator `@Language.component` (for function components) or "
"@Language.factory (for class components).\n\nAvailable " "`@Language.factory` (for class components).\n\nAvailable "
"factories: {opts}") "factories: {opts}")
E003 = ("Not a valid pipeline component. Expected callable, but " E003 = ("Not a valid pipeline component. Expected callable, but "
"got {component} (name: '{name}'). If you're using a custom " "got {component} (name: '{name}'). If you're using a custom "
@ -158,14 +147,13 @@ class Errors:
E008 = ("Can't restore disabled pipeline component '{name}' because it " E008 = ("Can't restore disabled pipeline component '{name}' because it "
"doesn't exist in the pipeline anymore. If you want to remove " "doesn't exist in the pipeline anymore. If you want to remove "
"components from the pipeline, you should do it before calling " "components from the pipeline, you should do it before calling "
"`nlp.select_pipes()` or after restoring the disabled components.") "`nlp.select_pipes` or after restoring the disabled components.")
E010 = ("Word vectors set to length 0. This may be because you don't have " E010 = ("Word vectors set to length 0. This may be because you don't have "
"a model installed or loaded, or because your model doesn't " "a model installed or loaded, or because your model doesn't "
"include word vectors. For more info, see the docs:\n" "include word vectors. For more info, see the docs:\n"
"https://nightly.spacy.io/usage/models") "https://nightly.spacy.io/usage/models")
E011 = ("Unknown operator: '{op}'. Options: {opts}") E011 = ("Unknown operator: '{op}'. Options: {opts}")
E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}") E012 = ("Cannot add pattern for zero tokens to matcher.\nKey: {key}")
E014 = ("Unknown tag ID: {tag}")
E016 = ("MultitaskObjective target should be function or one of: dep, " E016 = ("MultitaskObjective target should be function or one of: dep, "
"tag, ent, dep_tag_offset, ent_tag.") "tag, ent, dep_tag_offset, ent_tag.")
E017 = ("Can only add unicode or bytes. Got type: {value_type}") E017 = ("Can only add unicode or bytes. Got type: {value_type}")
@ -181,27 +169,24 @@ class Errors:
"For example, are all labels added to the model? If you're " "For example, are all labels added to the model? If you're "
"training a named entity recognizer, also make sure that none of " "training a named entity recognizer, also make sure that none of "
"your annotated entity spans have leading or trailing whitespace " "your annotated entity spans have leading or trailing whitespace "
"or punctuation. " "or punctuation. You can also use the `debug data` command to "
"You can also use the experimental `debug data` command to "
"validate your JSON-formatted training data. For details, run:\n" "validate your JSON-formatted training data. For details, run:\n"
"python -m spacy debug data --help") "python -m spacy debug data --help")
E025 = ("String is too long: {length} characters. Max is 2**30.") E025 = ("String is too long: {length} characters. Max is 2**30.")
E026 = ("Error accessing token at position {i}: out of bounds in Doc of " E026 = ("Error accessing token at position {i}: out of bounds in Doc of "
"length {length}.") "length {length}.")
E027 = ("Arguments 'words' and 'spaces' should be sequences of the same " E027 = ("Arguments `words` and `spaces` should be sequences of the same "
"length, or 'spaces' should be left default at None. spaces " "length, or `spaces` should be left default at None. `spaces` "
"should be a sequence of booleans, with True meaning that the " "should be a sequence of booleans, with True meaning that the "
"word owns a ' ' character following it.") "word owns a ' ' character following it.")
E028 = ("orths_and_spaces expects either a list of unicode string or a " E028 = ("`words` expects a list of unicode strings, but got bytes instance: {value}")
"list of (unicode, bool) tuples. Got bytes instance: {value}") E029 = ("`noun_chunks` requires the dependency parse, which requires a "
E029 = ("noun_chunks requires the dependency parse, which requires a "
"statistical model to be installed and loaded. For more info, see " "statistical model to be installed and loaded. For more info, see "
"the documentation:\nhttps://nightly.spacy.io/usage/models") "the documentation:\nhttps://nightly.spacy.io/usage/models")
E030 = ("Sentence boundaries unset. You can add the 'sentencizer' " E030 = ("Sentence boundaries unset. You can add the 'sentencizer' "
"component to the pipeline with: " "component to the pipeline with: `nlp.add_pipe('sentencizer')`. "
"nlp.add_pipe('sentencizer'). "
"Alternatively, add the dependency parser, or set sentence " "Alternatively, add the dependency parser, or set sentence "
"boundaries by setting doc[i].is_sent_start.") "boundaries by setting `doc[i].is_sent_start`.")
E031 = ("Invalid token: empty string ('') at position {i}.") E031 = ("Invalid token: empty string ('') at position {i}.")
E033 = ("Cannot load into non-empty Doc of length {length}.") E033 = ("Cannot load into non-empty Doc of length {length}.")
E035 = ("Error creating span with start {start} and end {end} for Doc of " E035 = ("Error creating span with start {start} and end {end} for Doc of "
@ -215,7 +200,7 @@ class Errors:
"issue here: http://github.com/explosion/spaCy/issues") "issue here: http://github.com/explosion/spaCy/issues")
E040 = ("Attempt to access token at {i}, max length {max_length}.") E040 = ("Attempt to access token at {i}, max length {max_length}.")
E041 = ("Invalid comparison operator: {op}. Likely a Cython bug?") E041 = ("Invalid comparison operator: {op}. Likely a Cython bug?")
E042 = ("Error accessing doc[{i}].nbor({j}), for doc of length {length}.") E042 = ("Error accessing `doc[{i}].nbor({j})`, for doc of length {length}.")
E043 = ("Refusing to write to token.sent_start if its document is parsed, " E043 = ("Refusing to write to token.sent_start if its document is parsed, "
"because this may cause inconsistent state.") "because this may cause inconsistent state.")
E044 = ("Invalid value for token.sent_start: {value}. Must be one of: " E044 = ("Invalid value for token.sent_start: {value}. Must be one of: "
@ -235,7 +220,7 @@ class Errors:
E056 = ("Invalid tokenizer exception: ORTH values combined don't match " E056 = ("Invalid tokenizer exception: ORTH values combined don't match "
"original string.\nKey: {key}\nOrths: {orths}") "original string.\nKey: {key}\nOrths: {orths}")
E057 = ("Stepped slices not supported in Span objects. Try: " E057 = ("Stepped slices not supported in Span objects. Try: "
"list(tokens)[start:stop:step] instead.") "`list(tokens)[start:stop:step]` instead.")
E058 = ("Could not retrieve vector for key {key}.") E058 = ("Could not retrieve vector for key {key}.")
E059 = ("One (and only one) keyword arg must be set. Got: {kwargs}") E059 = ("One (and only one) keyword arg must be set. Got: {kwargs}")
E060 = ("Cannot add new key to vectors: the table is full. Current shape: " E060 = ("Cannot add new key to vectors: the table is full. Current shape: "
@ -244,7 +229,7 @@ class Errors:
"and 63 are occupied. You can replace one by specifying the " "and 63 are occupied. You can replace one by specifying the "
"`flag_id` explicitly, e.g. " "`flag_id` explicitly, e.g. "
"`nlp.vocab.add_flag(your_func, flag_id=IS_ALPHA`.") "`nlp.vocab.add_flag(your_func, flag_id=IS_ALPHA`.")
E063 = ("Invalid value for flag_id: {value}. Flag IDs must be between 1 " E063 = ("Invalid value for `flag_id`: {value}. Flag IDs must be between 1 "
"and 63 (inclusive).") "and 63 (inclusive).")
E064 = ("Error fetching a Lexeme from the Vocab. When looking up a " E064 = ("Error fetching a Lexeme from the Vocab. When looking up a "
"string, the lexeme returned had an orth ID that did not match " "string, the lexeme returned had an orth ID that did not match "
@ -273,7 +258,7 @@ class Errors:
E085 = ("Can't create lexeme for string '{string}'.") E085 = ("Can't create lexeme for string '{string}'.")
E087 = ("Unknown displaCy style: {style}.") E087 = ("Unknown displaCy style: {style}.")
E088 = ("Text of length {length} exceeds maximum of {max_length}. The " E088 = ("Text of length {length} exceeds maximum of {max_length}. The "
"v2.x parser and NER models require roughly 1GB of temporary " "parser and NER models require roughly 1GB of temporary "
"memory per 100,000 characters in the input. This means long " "memory per 100,000 characters in the input. This means long "
"texts may cause memory allocation errors. If you're not using " "texts may cause memory allocation errors. If you're not using "
"the parser or NER, it's probably safe to increase the " "the parser or NER, it's probably safe to increase the "
@ -290,8 +275,8 @@ class Errors:
E094 = ("Error reading line {line_num} in vectors file {loc}.") E094 = ("Error reading line {line_num} in vectors file {loc}.")
E095 = ("Can't write to frozen dictionary. This is likely an internal " E095 = ("Can't write to frozen dictionary. This is likely an internal "
"error. Are you writing to a default function argument?") "error. Are you writing to a default function argument?")
E096 = ("Invalid object passed to displaCy: Can only visualize Doc or " E096 = ("Invalid object passed to displaCy: Can only visualize `Doc` or "
"Span objects, or dicts if set to manual=True.") "Span objects, or dicts if set to `manual=True`.")
E097 = ("Invalid pattern: expected token pattern (list of dicts) or " E097 = ("Invalid pattern: expected token pattern (list of dicts) or "
"phrase pattern (string) but got:\n{pattern}") "phrase pattern (string) but got:\n{pattern}")
E098 = ("Invalid pattern: expected both RIGHT_ID and RIGHT_ATTRS.") E098 = ("Invalid pattern: expected both RIGHT_ID and RIGHT_ATTRS.")
@ -308,11 +293,11 @@ class Errors:
E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A " E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A "
"token can only be part of one entity, so make sure the entities " "token can only be part of one entity, so make sure the entities "
"you're setting don't overlap.") "you're setting don't overlap.")
E106 = ("Can't find doc._.{attr} attribute specified in the underscore " E106 = ("Can't find `doc._.{attr}` attribute specified in the underscore "
"settings: {opts}") "settings: {opts}")
E107 = ("Value of doc._.{attr} is not JSON-serializable: {value}") E107 = ("Value of `doc._.{attr}` is not JSON-serializable: {value}")
E109 = ("Component '{name}' could not be run. Did you forget to " E109 = ("Component '{name}' could not be run. Did you forget to "
"call initialize()?") "call `initialize()`?")
E110 = ("Invalid displaCy render wrapper. Expected callable, got: {obj}") E110 = ("Invalid displaCy render wrapper. Expected callable, got: {obj}")
E111 = ("Pickling a token is not supported, because tokens are only views " E111 = ("Pickling a token is not supported, because tokens are only views "
"of the parent Doc and can't exist on their own. A pickled token " "of the parent Doc and can't exist on their own. A pickled token "
@ -329,8 +314,8 @@ class Errors:
E117 = ("The newly split tokens must match the text of the original token. " E117 = ("The newly split tokens must match the text of the original token. "
"New orths: {new}. Old text: {old}.") "New orths: {new}. Old text: {old}.")
E118 = ("The custom extension attribute '{attr}' is not registered on the " E118 = ("The custom extension attribute '{attr}' is not registered on the "
"Token object so it can't be set during retokenization. To " "`Token` object so it can't be set during retokenization. To "
"register an attribute, use the Token.set_extension classmethod.") "register an attribute, use the `Token.set_extension` classmethod.")
E119 = ("Can't set custom extension attribute '{attr}' during " E119 = ("Can't set custom extension attribute '{attr}' during "
"retokenization because it's not writable. This usually means it " "retokenization because it's not writable. This usually means it "
"was registered with a getter function (and no setter) or as a " "was registered with a getter function (and no setter) or as a "
@ -354,7 +339,7 @@ class Errors:
E130 = ("You are running a narrow unicode build, which is incompatible " E130 = ("You are running a narrow unicode build, which is incompatible "
"with spacy >= 2.1.0. To fix this, reinstall Python and use a wide " "with spacy >= 2.1.0. To fix this, reinstall Python and use a wide "
"unicode build instead. You can also rebuild Python and set the " "unicode build instead. You can also rebuild Python and set the "
"--enable-unicode=ucs4 flag.") "`--enable-unicode=ucs4 flag`.")
E131 = ("Cannot write the kb_id of an existing Span object because a Span " E131 = ("Cannot write the kb_id of an existing Span object because a Span "
"is a read-only view of the underlying Token objects stored in " "is a read-only view of the underlying Token objects stored in "
"the Doc. Instead, create a new Span object and specify the " "the Doc. Instead, create a new Span object and specify the "
@ -367,27 +352,20 @@ class Errors:
E133 = ("The sum of prior probabilities for alias '{alias}' should not " E133 = ("The sum of prior probabilities for alias '{alias}' should not "
"exceed 1, but found {sum}.") "exceed 1, but found {sum}.")
E134 = ("Entity '{entity}' is not defined in the Knowledge Base.") E134 = ("Entity '{entity}' is not defined in the Knowledge Base.")
E137 = ("Expected 'dict' type, but got '{type}' from '{line}'. Make sure " E139 = ("Knowledge base for component '{name}' is empty. Use the methods "
"to provide a valid JSON object as input with either the `text` " "`kb.add_entity` and `kb.add_alias` to add entries.")
"or `tokens` key. For more info, see the docs:\n"
"https://nightly.spacy.io/api/cli#pretrain-jsonl")
E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input "
"includes either the `text` or `tokens` key. For more info, see "
"the docs:\nhttps://nightly.spacy.io/api/cli#pretrain-jsonl")
E139 = ("Knowledge Base for component '{name}' is empty. Use the methods "
"kb.add_entity and kb.add_alias to add entries.")
E140 = ("The list of entities, prior probabilities and entity vectors " E140 = ("The list of entities, prior probabilities and entity vectors "
"should be of equal length.") "should be of equal length.")
E141 = ("Entity vectors should be of length {required} instead of the " E141 = ("Entity vectors should be of length {required} instead of the "
"provided {found}.") "provided {found}.")
E143 = ("Labels for component '{name}' not initialized. This can be fixed " E143 = ("Labels for component '{name}' not initialized. This can be fixed "
"by calling add_label, or by providing a representative batch of " "by calling add_label, or by providing a representative batch of "
"examples to the component's initialize method.") "examples to the component's `initialize` method.")
E145 = ("Error reading `{param}` from input file.") E145 = ("Error reading `{param}` from input file.")
E146 = ("Could not access `{path}`.") E146 = ("Could not access {path}.")
E147 = ("Unexpected error in the {method} functionality of the " E147 = ("Unexpected error in the {method} functionality of the "
"EntityLinker: {msg}. This is likely a bug in spaCy, so feel free " "EntityLinker: {msg}. This is likely a bug in spaCy, so feel free "
"to open an issue.") "to open an issue: https://github.com/explosion/spaCy/issues")
E148 = ("Expected {ents} KB identifiers but got {ids}. Make sure that " E148 = ("Expected {ents} KB identifiers but got {ids}. Make sure that "
"each entity in `doc.ents` is assigned to a KB identifier.") "each entity in `doc.ents` is assigned to a KB identifier.")
E149 = ("Error deserializing model. Check that the config used to create " E149 = ("Error deserializing model. Check that the config used to create "
@ -395,18 +373,18 @@ class Errors:
E150 = ("The language of the `nlp` object and the `vocab` should be the " E150 = ("The language of the `nlp` object and the `vocab` should be the "
"same, but found '{nlp}' and '{vocab}' respectively.") "same, but found '{nlp}' and '{vocab}' respectively.")
E152 = ("The attribute {attr} is not supported for token patterns. " E152 = ("The attribute {attr} is not supported for token patterns. "
"Please use the option validate=True with Matcher, PhraseMatcher, " "Please use the option `validate=True` with the Matcher, PhraseMatcher, "
"or EntityRuler for more details.") "or EntityRuler for more details.")
E153 = ("The value type {vtype} is not supported for token patterns. " E153 = ("The value type {vtype} is not supported for token patterns. "
"Please use the option validate=True with Matcher, PhraseMatcher, " "Please use the option validate=True with Matcher, PhraseMatcher, "
"or EntityRuler for more details.") "or EntityRuler for more details.")
E154 = ("One of the attributes or values is not supported for token " E154 = ("One of the attributes or values is not supported for token "
"patterns. Please use the option validate=True with Matcher, " "patterns. Please use the option `validate=True` with the Matcher, "
"PhraseMatcher, or EntityRuler for more details.") "PhraseMatcher, or EntityRuler for more details.")
E155 = ("The pipeline needs to include a {pipe} in order to use " E155 = ("The pipeline needs to include a {pipe} in order to use "
"Matcher or PhraseMatcher with the attribute {attr}. " "Matcher or PhraseMatcher with the attribute {attr}. "
"Try using nlp() instead of nlp.make_doc() or list(nlp.pipe()) " "Try using `nlp()` instead of `nlp.make_doc()` or `list(nlp.pipe())` "
"instead of list(nlp.tokenizer.pipe()).") "instead of `list(nlp.tokenizer.pipe())`.")
E157 = ("Can't render negative values for dependency arc start or end. " E157 = ("Can't render negative values for dependency arc start or end. "
"Make sure that you're passing in absolute token indices, not " "Make sure that you're passing in absolute token indices, not "
"relative token offsets.\nstart: {start}, end: {end}, label: " "relative token offsets.\nstart: {start}, end: {end}, label: "
@ -415,13 +393,11 @@ class Errors:
E159 = ("Can't find table '{name}' in lookups. Available tables: {tables}") E159 = ("Can't find table '{name}' in lookups. Available tables: {tables}")
E160 = ("Can't find language data file: {path}") E160 = ("Can't find language data file: {path}")
E161 = ("Found an internal inconsistency when predicting entity links. " E161 = ("Found an internal inconsistency when predicting entity links. "
"This is likely a bug in spaCy, so feel free to open an issue.") "This is likely a bug in spaCy, so feel free to open an issue: "
E162 = ("Cannot evaluate textcat model on data with different labels.\n" "https://github.com/explosion/spaCy/issues")
"Labels in model: {model_labels}\nLabels in evaluation "
"data: {eval_labels}")
E163 = ("cumsum was found to be unstable: its last element does not " E163 = ("cumsum was found to be unstable: its last element does not "
"correspond to sum") "correspond to sum")
E164 = ("x is neither increasing nor decreasing: {}.") E164 = ("x is neither increasing nor decreasing: {x}.")
E165 = ("Only one class present in y_true. ROC AUC score is not defined in " E165 = ("Only one class present in y_true. ROC AUC score is not defined in "
"that case.") "that case.")
E166 = ("Can only merge DocBins with the same value for '{param}'.\n" E166 = ("Can only merge DocBins with the same value for '{param}'.\n"
@ -436,10 +412,10 @@ class Errors:
E178 = ("Each pattern should be a list of dicts, but got: {pat}. Maybe you " E178 = ("Each pattern should be a list of dicts, but got: {pat}. Maybe you "
"accidentally passed a single pattern to Matcher.add instead of a " "accidentally passed a single pattern to Matcher.add instead of a "
"list of patterns? If you only want to add one pattern, make sure " "list of patterns? If you only want to add one pattern, make sure "
"to wrap it in a list. For example: matcher.add('{key}', [pattern])") "to wrap it in a list. For example: `matcher.add('{key}', [pattern])`")
E179 = ("Invalid pattern. Expected a list of Doc objects but got a single " E179 = ("Invalid pattern. Expected a list of Doc objects but got a single "
"Doc. If you only want to add one pattern, make sure to wrap it " "Doc. If you only want to add one pattern, make sure to wrap it "
"in a list. For example: matcher.add('{key}', [doc])") "in a list. For example: `matcher.add('{key}', [doc])`")
E180 = ("Span attributes can't be declared as required or assigned by " E180 = ("Span attributes can't be declared as required or assigned by "
"components, since spans are only views of the Doc. Use Doc and " "components, since spans are only views of the Doc. Use Doc and "
"Token attributes (or custom extension attributes) only and remove " "Token attributes (or custom extension attributes) only and remove "
@ -447,17 +423,16 @@ class Errors:
E181 = ("Received invalid attributes for unkown object {obj}: {attrs}. " E181 = ("Received invalid attributes for unkown object {obj}: {attrs}. "
"Only Doc and Token attributes are supported.") "Only Doc and Token attributes are supported.")
E182 = ("Received invalid attribute declaration: {attr}\nDid you forget " E182 = ("Received invalid attribute declaration: {attr}\nDid you forget "
"to define the attribute? For example: {attr}.???") "to define the attribute? For example: `{attr}.???`")
E183 = ("Received invalid attribute declaration: {attr}\nOnly top-level " E183 = ("Received invalid attribute declaration: {attr}\nOnly top-level "
"attributes are supported, for example: {solution}") "attributes are supported, for example: {solution}")
E184 = ("Only attributes without underscores are supported in component " E184 = ("Only attributes without underscores are supported in component "
"attribute declarations (because underscore and non-underscore " "attribute declarations (because underscore and non-underscore "
"attributes are connected anyways): {attr} -> {solution}") "attributes are connected anyways): {attr} -> {solution}")
E185 = ("Received invalid attribute in component attribute declaration: " E185 = ("Received invalid attribute in component attribute declaration: "
"{obj}.{attr}\nAttribute '{attr}' does not exist on {obj}.") "`{obj}.{attr}`\nAttribute '{attr}' does not exist on {obj}.")
E186 = ("'{tok_a}' and '{tok_b}' are different texts.")
E187 = ("Only unicode strings are supported as labels.") E187 = ("Only unicode strings are supported as labels.")
E189 = ("Each argument to Doc.__init__ should be of equal length.") E189 = ("Each argument to `Doc.__init__` should be of equal length.")
E190 = ("Token head out of range in `Doc.from_array()` for token index " E190 = ("Token head out of range in `Doc.from_array()` for token index "
"'{index}' with value '{value}' (equivalent to relative head " "'{index}' with value '{value}' (equivalent to relative head "
"index: '{rel_head_index}'). The head indices should be relative " "index: '{rel_head_index}'). The head indices should be relative "
@ -471,17 +446,32 @@ class Errors:
"({curr_dim}).") "({curr_dim}).")
E194 = ("Unable to aligned mismatched text '{text}' and words '{words}'.") E194 = ("Unable to aligned mismatched text '{text}' and words '{words}'.")
E195 = ("Matcher can be called on {good} only, got {got}.") E195 = ("Matcher can be called on {good} only, got {got}.")
E196 = ("Refusing to write to token.is_sent_end. Sentence boundaries can " E196 = ("Refusing to write to `token.is_sent_end`. Sentence boundaries can "
"only be fixed with token.is_sent_start.") "only be fixed with `token.is_sent_start`.")
E197 = ("Row out of bounds, unable to add row {row} for key {key}.") E197 = ("Row out of bounds, unable to add row {row} for key {key}.")
E198 = ("Unable to return {n} most similar vectors for the current vectors " E198 = ("Unable to return {n} most similar vectors for the current vectors "
"table, which contains {n_rows} vectors.") "table, which contains {n_rows} vectors.")
E199 = ("Unable to merge 0-length span at doc[{start}:{end}].") E199 = ("Unable to merge 0-length span at `doc[{start}:{end}]`.")
E200 = ("Specifying a base model with a pretrained component '{component}' " E200 = ("Can't yet set {attr} from Span. Vote for this feature on the "
"can not be combined with adding a pretrained Tok2Vec layer.") "issue tracker: http://github.com/explosion/spaCy/issues")
E201 = ("Span index out of range.")
# TODO: fix numbering after merging develop into master # TODO: fix numbering after merging develop into master
E092 = ("The sentence-per-line IOB/IOB2 file is not formatted correctly. "
"Try checking whitespace and delimiters. See "
"https://nightly.spacy.io/api/cli#convert")
E093 = ("The token-per-line NER file is not formatted correctly. Try checking "
"whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert")
E904 = ("Cannot initialize StaticVectors layer: nO dimension unset. This "
"dimension refers to the output width, after the linear projection "
"has been applied.")
E905 = ("Cannot initialize StaticVectors layer: nM dimension unset. This "
"dimension refers to the width of the vectors table.")
E906 = ("Unexpected `loss` value in pretraining objective: {loss_type}")
E907 = ("Unexpected `objective_type` value in pretraining objective: {objective_type}")
E908 = ("Can't set `spaces` without `words` in `Doc.__init__`.")
E909 = ("Expected {name} in parser internals. This is likely a bug in spaCy.")
E910 = ("Encountered NaN value when computing loss for component '{name}'.")
E911 = ("Invalid feature: {feat}. Must be a token attribute.")
E912 = ("Failed to initialize lemmatizer. Missing lemmatizer table(s) found " E912 = ("Failed to initialize lemmatizer. Missing lemmatizer table(s) found "
"for mode '{mode}'. Required tables: {tables}. Found: {found}.") "for mode '{mode}'. Required tables: {tables}. Found: {found}.")
E913 = ("Corpus path can't be None. Maybe you forgot to define it in your " E913 = ("Corpus path can't be None. Maybe you forgot to define it in your "
@ -494,44 +484,44 @@ class Errors:
"final score, set its weight to null in the [training.score_weights] " "final score, set its weight to null in the [training.score_weights] "
"section of your training config.") "section of your training config.")
E916 = ("Can't log score for '{name}' in table: not a valid score ({score_type})") E916 = ("Can't log score for '{name}' in table: not a valid score ({score_type})")
E917 = ("Received invalid value {value} for 'state_type' in " E917 = ("Received invalid value {value} for `state_type` in "
"TransitionBasedParser: only 'parser' or 'ner' are valid options.") "TransitionBasedParser: only 'parser' or 'ner' are valid options.")
E918 = ("Received invalid value for vocab: {vocab} ({vocab_type}). Valid " E918 = ("Received invalid value for vocab: {vocab} ({vocab_type}). Valid "
"values are an instance of spacy.vocab.Vocab or True to create one" "values are an instance of `spacy.vocab.Vocab` or True to create one"
" (default).") " (default).")
E919 = ("A textcat 'positive_label' '{pos_label}' was provided for training " E919 = ("A textcat `positive_label` '{pos_label}' was provided for training "
"data that does not appear to be a binary classification problem " "data that does not appear to be a binary classification problem "
"with two labels. Labels found: {labels}") "with two labels. Labels found: {labels}")
E920 = ("The textcat's 'positive_label' setting '{pos_label}' " E920 = ("The textcat's `positive_label` setting '{pos_label}' "
"does not match any label in the training data or provided during " "does not match any label in the training data or provided during "
"initialization. Available labels: {labels}") "initialization. Available labels: {labels}")
E921 = ("The method 'set_output' can only be called on components that have " E921 = ("The method `set_output` can only be called on components that have "
"a Model with a 'resize_output' attribute. Otherwise, the output " "a Model with a `resize_output` attribute. Otherwise, the output "
"layer can not be dynamically changed.") "layer can not be dynamically changed.")
E922 = ("Component '{name}' has been initialized with an output dimension of " E922 = ("Component '{name}' has been initialized with an output dimension of "
"{nO} - cannot add any more labels.") "{nO} - cannot add any more labels.")
E923 = ("It looks like there is no proper sample data to initialize the " E923 = ("It looks like there is no proper sample data to initialize the "
"Model of component '{name}'. " "Model of component '{name}'. This is likely a bug in spaCy, so "
"This is likely a bug in spaCy, so feel free to open an issue.") "feel free to open an issue: https://github.com/explosion/spaCy/issues")
E924 = ("The '{name}' component does not seem to be initialized properly. " E924 = ("The '{name}' component does not seem to be initialized properly. "
"This is likely a bug in spaCy, so feel free to open an issue.") "This is likely a bug in spaCy, so feel free to open an issue: "
"https://github.com/explosion/spaCy/issues")
E925 = ("Invalid color values for displaCy visualizer: expected dictionary " E925 = ("Invalid color values for displaCy visualizer: expected dictionary "
"mapping label names to colors but got: {obj}") "mapping label names to colors but got: {obj}")
E926 = ("It looks like you're trying to modify nlp.{attr} directly. This " E926 = ("It looks like you're trying to modify `nlp.{attr}` directly. This "
"doesn't work because it's an immutable computed property. If you " "doesn't work because it's an immutable computed property. If you "
"need to modify the pipeline, use the built-in methods like " "need to modify the pipeline, use the built-in methods like "
"nlp.add_pipe, nlp.remove_pipe, nlp.disable_pipe or nlp.enable_pipe " "`nlp.add_pipe`, `nlp.remove_pipe`, `nlp.disable_pipe` or "
"instead.") "`nlp.enable_pipe` instead.")
E927 = ("Can't write to frozen list Maybe you're trying to modify a computed " E927 = ("Can't write to frozen list Maybe you're trying to modify a computed "
"property or default function argument?") "property or default function argument?")
E928 = ("A 'KnowledgeBase' can only be serialized to/from from a directory, " E928 = ("A KnowledgeBase can only be serialized to/from from a directory, "
"but the provided argument {loc} points to a file.") "but the provided argument {loc} points to a file.")
E929 = ("A 'KnowledgeBase' could not be read from {loc} - the path does " E929 = ("Couldn't read KnowledgeBase from {loc}. The path does not seem to exist.")
"not seem to exist.") E930 = ("Received invalid get_examples callback in `{name}.initialize`. "
E930 = ("Received invalid get_examples callback in {name}.initialize. "
"Expected function that returns an iterable of Example objects but " "Expected function that returns an iterable of Example objects but "
"got: {obj}") "got: {obj}")
E931 = ("Encountered Pipe subclass without Pipe.{method} method in component " E931 = ("Encountered Pipe subclass without `Pipe.{method}` method in component "
"'{name}'. If the component is trainable and you want to use this " "'{name}'. If the component is trainable and you want to use this "
"method, make sure it's overwritten on the subclass. If your " "method, make sure it's overwritten on the subclass. If your "
"component isn't trainable, add a method that does nothing or " "component isn't trainable, add a method that does nothing or "
@ -544,21 +534,21 @@ class Errors:
"models, see the models directory: https://spacy.io/models. If you " "models, see the models directory: https://spacy.io/models. If you "
"want to create a blank model, use spacy.blank: " "want to create a blank model, use spacy.blank: "
"nlp = spacy.blank(\"{name}\")") "nlp = spacy.blank(\"{name}\")")
E942 = ("Executing after_{name} callback failed. Expected the function to " E942 = ("Executing `after_{name}` callback failed. Expected the function to "
"return an initialized nlp object but got: {value}. Maybe " "return an initialized nlp object but got: {value}. Maybe "
"you forgot to return the modified object in your function?") "you forgot to return the modified object in your function?")
E943 = ("Executing before_creation callback failed. Expected the function to " E943 = ("Executing `before_creation` callback failed. Expected the function to "
"return an uninitialized Language subclass but got: {value}. Maybe " "return an uninitialized Language subclass but got: {value}. Maybe "
"you forgot to return the modified object in your function or " "you forgot to return the modified object in your function or "
"returned the initialized nlp object instead?") "returned the initialized nlp object instead?")
E944 = ("Can't copy pipeline component '{name}' from source model '{model}': " E944 = ("Can't copy pipeline component '{name}' from source '{model}': "
"not found in pipeline. Available components: {opts}") "not found in pipeline. Available components: {opts}")
E945 = ("Can't copy pipeline component '{name}' from source. Expected loaded " E945 = ("Can't copy pipeline component '{name}' from source. Expected loaded "
"nlp object, but got: {source}") "nlp object, but got: {source}")
E947 = ("Matcher.add received invalid 'greedy' argument: expected " E947 = ("`Matcher.add` received invalid `greedy` argument: expected "
"a string value from {expected} but got: '{arg}'") "a string value from {expected} but got: '{arg}'")
E948 = ("Matcher.add received invalid 'patterns' argument: expected " E948 = ("`Matcher.add` received invalid 'patterns' argument: expected "
"a List, but got: {arg_type}") "a list, but got: {arg_type}")
E949 = ("Can only create an alignment when the texts are the same.") E949 = ("Can only create an alignment when the texts are the same.")
E952 = ("The section '{name}' is not a valid section in the provided config.") E952 = ("The section '{name}' is not a valid section in the provided config.")
E953 = ("Mismatched IDs received by the Tok2Vec listener: {id1} vs. {id2}") E953 = ("Mismatched IDs received by the Tok2Vec listener: {id1} vs. {id2}")
@ -570,9 +560,9 @@ class Errors:
"for your language.") "for your language.")
E956 = ("Can't find component '{name}' in [components] block in the config. " E956 = ("Can't find component '{name}' in [components] block in the config. "
"Available components: {opts}") "Available components: {opts}")
E957 = ("Writing directly to Language.factories isn't needed anymore in " E957 = ("Writing directly to `Language.factories` isn't needed anymore in "
"spaCy v3. Instead, you can use the @Language.factory decorator " "spaCy v3. Instead, you can use the `@Language.factory` decorator "
"to register your custom component factory or @Language.component " "to register your custom component factory or `@Language.component` "
"to register a simple stateless function component that just takes " "to register a simple stateless function component that just takes "
"a Doc and returns it.") "a Doc and returns it.")
E958 = ("Language code defined in config ({bad_lang_code}) does not match " E958 = ("Language code defined in config ({bad_lang_code}) does not match "
@ -590,99 +580,93 @@ class Errors:
"component.\n\n{config}") "component.\n\n{config}")
E962 = ("Received incorrect {style} for pipe '{name}'. Expected dict, " E962 = ("Received incorrect {style} for pipe '{name}'. Expected dict, "
"got: {cfg_type}.") "got: {cfg_type}.")
E963 = ("Can't read component info from @Language.{decorator} decorator. " E963 = ("Can't read component info from `@Language.{decorator}` decorator. "
"Maybe you forgot to call it? Make sure you're using " "Maybe you forgot to call it? Make sure you're using "
"@Language.{decorator}() instead of @Language.{decorator}.") "`@Language.{decorator}()` instead of `@Language.{decorator}`.")
E964 = ("The pipeline component factory for '{name}' needs to have the " E964 = ("The pipeline component factory for '{name}' needs to have the "
"following named arguments, which are passed in by spaCy:\n- nlp: " "following named arguments, which are passed in by spaCy:\n- nlp: "
"receives the current nlp object and lets you access the vocab\n- " "receives the current nlp object and lets you access the vocab\n- "
"name: the name of the component instance, can be used to identify " "name: the name of the component instance, can be used to identify "
"the component, output losses etc.") "the component, output losses etc.")
E965 = ("It looks like you're using the @Language.component decorator to " E965 = ("It looks like you're using the `@Language.component` decorator to "
"register '{name}' on a class instead of a function component. If " "register '{name}' on a class instead of a function component. If "
"you need to register a class or function that *returns* a component " "you need to register a class or function that *returns* a component "
"function, use the @Language.factory decorator instead.") "function, use the `@Language.factory` decorator instead.")
E966 = ("nlp.add_pipe now takes the string name of the registered component " E966 = ("`nlp.add_pipe` now takes the string name of the registered component "
"factory, not a callable component. Expected string, but got " "factory, not a callable component. Expected string, but got "
"{component} (name: '{name}').\n\n- If you created your component " "{component} (name: '{name}').\n\n- If you created your component "
"with nlp.create_pipe('name'): remove nlp.create_pipe and call " "with `nlp.create_pipe('name')`: remove nlp.create_pipe and call "
"nlp.add_pipe('name') instead.\n\n- If you passed in a component " "`nlp.add_pipe('name')` instead.\n\n- If you passed in a component "
"like TextCategorizer(): call nlp.add_pipe with the string name " "like `TextCategorizer()`: call `nlp.add_pipe` with the string name "
"instead, e.g. nlp.add_pipe('textcat').\n\n- If you're using a custom " "instead, e.g. `nlp.add_pipe('textcat')`.\n\n- If you're using a custom "
"component: Add the decorator @Language.component (for function " "component: Add the decorator `@Language.component` (for function "
"components) or @Language.factory (for class components / factories) " "components) or `@Language.factory` (for class components / factories) "
"to your custom component and assign it a name, e.g. " "to your custom component and assign it a name, e.g. "
"@Language.component('your_name'). You can then run " "`@Language.component('your_name')`. You can then run "
"nlp.add_pipe('your_name') to add it to the pipeline.") "`nlp.add_pipe('your_name')` to add it to the pipeline.")
E967 = ("No {meta} meta information found for '{name}'. This is likely a bug in spaCy.") E967 = ("No {meta} meta information found for '{name}'. This is likely a bug in spaCy.")
E968 = ("nlp.replace_pipe now takes the string name of the registered component " E968 = ("`nlp.replace_pipe` now takes the string name of the registered component "
"factory, not a callable component. Expected string, but got " "factory, not a callable component. Expected string, but got "
"{component}.\n\n- If you created your component with" "{component}.\n\n- If you created your component with"
"with nlp.create_pipe('name'): remove nlp.create_pipe and call " "with `nlp.create_pipe('name')`: remove `nlp.create_pipe` and call "
"nlp.replace_pipe('{name}', 'name') instead.\n\n- If you passed in a " "`nlp.replace_pipe('{name}', 'name')` instead.\n\n- If you passed in a "
"component like TextCategorizer(): call nlp.replace_pipe with the " "component like `TextCategorizer()`: call `nlp.replace_pipe` with the "
"string name instead, e.g. nlp.replace_pipe('{name}', 'textcat').\n\n" "string name instead, e.g. `nlp.replace_pipe('{name}', 'textcat')`.\n\n"
"- If you're using a custom component: Add the decorator " "- If you're using a custom component: Add the decorator "
"@Language.component (for function components) or @Language.factory " "`@Language.component` (for function components) or `@Language.factory` "
"(for class components / factories) to your custom component and " "(for class components / factories) to your custom component and "
"assign it a name, e.g. @Language.component('your_name'). You can " "assign it a name, e.g. `@Language.component('your_name')`. You can "
"then run nlp.replace_pipe('{name}', 'your_name').") "then run `nlp.replace_pipe('{name}', 'your_name')`.")
E969 = ("Expected string values for field '{field}', but received {types} instead. ") E969 = ("Expected string values for field '{field}', but received {types} instead. ")
E970 = ("Can not execute command '{str_command}'. Do you have '{tool}' installed?") E970 = ("Can not execute command '{str_command}'. Do you have '{tool}' installed?")
E971 = ("Found incompatible lengths in Doc.from_array: {array_length} for the " E971 = ("Found incompatible lengths in `Doc.from_array`: {array_length} for the "
"array and {doc_length} for the Doc itself.") "array and {doc_length} for the Doc itself.")
E972 = ("Example.__init__ got None for '{arg}'. Requires Doc.") E972 = ("`Example.__init__` got None for '{arg}'. Requires Doc.")
E973 = ("Unexpected type for NER data") E973 = ("Unexpected type for NER data")
E974 = ("Unknown {obj} attribute: {key}") E974 = ("Unknown {obj} attribute: {key}")
E976 = ("The method 'Example.from_dict' expects a {type} as {n} argument, " E976 = ("The method `Example.from_dict` expects a {type} as {n} argument, "
"but received None.") "but received None.")
E977 = ("Can not compare a MorphAnalysis with a string object. " E977 = ("Can not compare a MorphAnalysis with a string object. "
"This is likely a bug in spaCy, so feel free to open an issue.") "This is likely a bug in spaCy, so feel free to open an issue: "
"https://github.com/explosion/spaCy/issues")
E978 = ("The {name} method takes a list of Example objects, but got: {types}") E978 = ("The {name} method takes a list of Example objects, but got: {types}")
E979 = ("Cannot convert {type} to an Example object.")
E980 = ("Each link annotation should refer to a dictionary with at most one " E980 = ("Each link annotation should refer to a dictionary with at most one "
"identifier mapping to 1.0, and all others to 0.0.") "identifier mapping to 1.0, and all others to 0.0.")
E981 = ("The offsets of the annotations for 'links' could not be aligned " E981 = ("The offsets of the annotations for `links` could not be aligned "
"to token boundaries.") "to token boundaries.")
E982 = ("The 'ent_iob' attribute of a Token should be an integer indexing " E982 = ("The `Token.ent_iob` attribute should be an integer indexing "
"into {values}, but found {value}.") "into {values}, but found {value}.")
E983 = ("Invalid key for '{dict}': {key}. Available keys: " E983 = ("Invalid key for '{dict}': {key}. Available keys: "
"{keys}") "{keys}")
E984 = ("Invalid component config for '{name}': component block needs either " E984 = ("Invalid component config for '{name}': component block needs either "
"a key 'factory' specifying the registered function used to " "a key `factory` specifying the registered function used to "
"initialize the component, or a key 'source' key specifying a " "initialize the component, or a key `source` key specifying a "
"spaCy model to copy the component from. For example, factory = " "spaCy model to copy the component from. For example, `factory = "
"\"ner\" will use the 'ner' factory and all other settings in the " "\"ner\"` will use the 'ner' factory and all other settings in the "
"block will be passed to it as arguments. Alternatively, source = " "block will be passed to it as arguments. Alternatively, `source = "
"\"en_core_web_sm\" will copy the component from that model.\n\n{config}") "\"en_core_web_sm\"` will copy the component from that model.\n\n{config}")
E985 = ("Can't load model from config file: no 'nlp' section found.\n\n{config}") E985 = ("Can't load model from config file: no [nlp] section found.\n\n{config}")
E986 = ("Could not create any training batches: check your input. " E986 = ("Could not create any training batches: check your input. "
"Are the train and dev paths defined? " "Are the train and dev paths defined? Is `discard_oversize` set appropriately? ")
"Is 'discard_oversize' set appropriately? ") E989 = ("`nlp.update()` was called with two positional arguments. This "
E987 = ("The text of an example training instance is either a Doc or "
"a string, but found {type} instead.")
E988 = ("Could not parse any training examples. Ensure the data is "
"formatted correctly.")
E989 = ("'nlp.update()' was called with two positional arguments. This "
"may be due to a backwards-incompatible change to the format " "may be due to a backwards-incompatible change to the format "
"of the training data in spaCy 3.0 onwards. The 'update' " "of the training data in spaCy 3.0 onwards. The 'update' "
"function should now be called with a batch of 'Example' " "function should now be called with a batch of Example "
"objects, instead of (text, annotation) tuples. ") "objects, instead of `(text, annotation)` tuples. ")
E991 = ("The function 'select_pipes' should be called with either a " E991 = ("The function `nlp.select_pipes` should be called with either a "
"'disable' argument to list the names of the pipe components " "`disable` argument to list the names of the pipe components "
"that should be disabled, or with an 'enable' argument that " "that should be disabled, or with an 'enable' argument that "
"specifies which pipes should not be disabled.") "specifies which pipes should not be disabled.")
E992 = ("The function `select_pipes` was called with `enable`={enable} " E992 = ("The function `select_pipes` was called with `enable`={enable} "
"and `disable`={disable} but that information is conflicting " "and `disable`={disable} but that information is conflicting "
"for the `nlp` pipeline with components {names}.") "for the `nlp` pipeline with components {names}.")
E993 = ("The config for 'nlp' needs to include a key 'lang' specifying " E993 = ("The config for the nlp object needs to include a key `lang` specifying "
"the code of the language to initialize it with (for example " "the code of the language to initialize it with (for example "
"'en' for English) - this can't be 'None'.\n\n{config}") "'en' for English) - this can't be None.\n\n{config}")
E996 = ("Could not parse {file}: {msg}")
E997 = ("Tokenizer special cases are not allowed to modify the text. " E997 = ("Tokenizer special cases are not allowed to modify the text. "
"This would map '{chunk}' to '{orth}' given token attributes " "This would map '{chunk}' to '{orth}' given token attributes "
"'{token_attrs}'.") "'{token_attrs}'.")
E999 = ("Unable to merge the `Doc` objects because they do not all share " E999 = ("Unable to merge the Doc objects because they do not all share "
"the same `Vocab`.") "the same `Vocab`.")
E1000 = ("The Chinese word segmenter is pkuseg but no pkuseg model was " E1000 = ("The Chinese word segmenter is pkuseg but no pkuseg model was "
"loaded. Provide the name of a pretrained model or the path to " "loaded. Provide the name of a pretrained model or the path to "
@ -694,35 +678,24 @@ class Errors:
E1003 = ("Unsupported lemmatizer mode '{mode}'.") E1003 = ("Unsupported lemmatizer mode '{mode}'.")
E1004 = ("Missing lemmatizer table(s) found for lemmatizer mode '{mode}'. " E1004 = ("Missing lemmatizer table(s) found for lemmatizer mode '{mode}'. "
"Required tables: {tables}. Found: {found}. Maybe you forgot to " "Required tables: {tables}. Found: {found}. Maybe you forgot to "
"call nlp.initialize() to load in the data?") "call `nlp.initialize()` to load in the data?")
E1005 = ("Unable to set attribute '{attr}' in tokenizer exception for " E1005 = ("Unable to set attribute '{attr}' in tokenizer exception for "
"'{chunk}'. Tokenizer exceptions are only allowed to specify " "'{chunk}'. Tokenizer exceptions are only allowed to specify "
"`ORTH` and `NORM`.") "ORTH and NORM.")
E1006 = ("Unable to initialize {name} model with 0 labels.")
E1007 = ("Unsupported DependencyMatcher operator '{op}'.") E1007 = ("Unsupported DependencyMatcher operator '{op}'.")
E1008 = ("Invalid pattern: each pattern should be a list of dicts. Check " E1008 = ("Invalid pattern: each pattern should be a list of dicts. Check "
"that you are providing a list of patterns as `List[List[dict]]`.") "that you are providing a list of patterns as `List[List[dict]]`.")
E1009 = ("String for hash '{val}' not found in StringStore. Set the value "
"through token.morph_ instead or add the string to the "
"StringStore with `nlp.vocab.strings.add(string)`.")
E1010 = ("Unable to set entity information for token {i} which is included " E1010 = ("Unable to set entity information for token {i} which is included "
"in more than one span in entities, blocked, missing or outside.") "in more than one span in entities, blocked, missing or outside.")
E1011 = ("Unsupported default '{default}' in doc.set_ents. Available " E1011 = ("Unsupported default '{default}' in `doc.set_ents`. Available "
"options: {modes}") "options: {modes}")
E1012 = ("Entity spans and blocked/missing/outside spans should be " E1012 = ("Entity spans and blocked/missing/outside spans should be "
"provided to doc.set_ents as lists of `Span` objects.") "provided to `doc.set_ents` as lists of Span objects.")
E1013 = ("Invalid morph: the MorphAnalysis must have the same vocab as the " E1013 = ("Invalid morph: the MorphAnalysis must have the same vocab as the "
"token itself. To set the morph from this MorphAnalysis, set from " "token itself. To set the morph from this MorphAnalysis, set from "
"the string value with: `token.set_morph(str(other_morph))`.") "the string value with: `token.set_morph(str(other_morph))`.")
@add_codes
class TempErrors:
T003 = ("Resizing pretrained Tagger models is not currently supported.")
T007 = ("Can't yet set {attr} from Span. Vote for this feature on the "
"issue tracker: http://github.com/explosion/spaCy/issues")
# Deprecated model shortcuts, only used in errors and warnings # Deprecated model shortcuts, only used in errors and warnings
OLD_MODEL_SHORTCUTS = { OLD_MODEL_SHORTCUTS = {
"en": "en_core_web_sm", "de": "de_core_news_sm", "es": "es_core_news_sm", "en": "en_core_web_sm", "de": "de_core_news_sm", "es": "es_core_news_sm",

View File

@ -6,6 +6,7 @@ from thinc.api import expand_window, residual, Maxout, Mish, PyTorchLSTM
from ...tokens import Doc from ...tokens import Doc
from ...util import registry from ...util import registry
from ...errors import Errors
from ...ml import _character_embed from ...ml import _character_embed
from ..staticvectors import StaticVectors from ..staticvectors import StaticVectors
from ..featureextractor import FeatureExtractor from ..featureextractor import FeatureExtractor
@ -201,7 +202,7 @@ def CharacterEmbed(
""" """
feature = intify_attr(feature) feature = intify_attr(feature)
if feature is None: if feature is None:
raise ValueError("Invalid feature: Must be a token attribute.") raise ValueError(Errors.E911(feat=feature))
if also_use_static_vectors: if also_use_static_vectors:
model = chain( model = chain(
concatenate( concatenate(

View File

@ -1,11 +1,11 @@
from typing import List, Tuple, Callable, Optional, cast from typing import List, Tuple, Callable, Optional, cast
from thinc.initializers import glorot_uniform_init from thinc.initializers import glorot_uniform_init
from thinc.util import partial from thinc.util import partial
from thinc.types import Ragged, Floats2d, Floats1d from thinc.types import Ragged, Floats2d, Floats1d
from thinc.api import Model, Ops, registry from thinc.api import Model, Ops, registry
from ..tokens import Doc from ..tokens import Doc
from ..errors import Errors
@registry.layers("spacy.StaticVectors.v1") @registry.layers("spacy.StaticVectors.v1")
@ -76,16 +76,9 @@ def init(
nO = Y.data.shape[1] nO = Y.data.shape[1]
if nM is None: if nM is None:
raise ValueError( raise ValueError(Errors.E905)
"Cannot initialize StaticVectors layer: nM dimension unset. "
"This dimension refers to the width of the vectors table."
)
if nO is None: if nO is None:
raise ValueError( raise ValueError(Errors.E904)
"Cannot initialize StaticVectors layer: nO dimension unset. "
"This dimension refers to the output width, after the linear "
"projection has been applied."
)
model.set_dim("nM", nM) model.set_dim("nM", nM)
model.set_dim("nO", nO) model.set_dim("nO", nO)
model.set_param("W", init_W(model.ops, (nO, nM))) model.set_param("W", init_W(model.ops, (nO, nM)))

View File

@ -9,10 +9,11 @@ from ...strings cimport hash_string
from ...structs cimport TokenC from ...structs cimport TokenC
from ...tokens.doc cimport Doc, set_children_from_heads from ...tokens.doc cimport Doc, set_children_from_heads
from ...training.example cimport Example from ...training.example cimport Example
from ...errors import Errors
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC
from ...errors import Errors
# Calculate cost as gold/not gold. We don't use scalar value anyway. # Calculate cost as gold/not gold. We don't use scalar value anyway.
cdef int BINARY_COSTS = 1 cdef int BINARY_COSTS = 1
cdef weight_t MIN_SCORE = -90000 cdef weight_t MIN_SCORE = -90000
@ -86,7 +87,7 @@ cdef GoldParseStateC create_gold_state(Pool mem, StateClass stcls,
SENT_START_UNKNOWN, SENT_START_UNKNOWN,
0 0
) )
elif is_sent_start is None: elif is_sent_start is None:
gs.state_bits[i] = set_state_flag( gs.state_bits[i] = set_state_flag(
gs.state_bits[i], gs.state_bits[i],
@ -109,7 +110,7 @@ cdef GoldParseStateC create_gold_state(Pool mem, StateClass stcls,
IS_SENT_START, IS_SENT_START,
0 0
) )
for i, (head, label) in enumerate(zip(heads, labels)): for i, (head, label) in enumerate(zip(heads, labels)):
if head is not None: if head is not None:
gs.heads[i] = head gs.heads[i] = head
@ -158,7 +159,7 @@ cdef void update_gold_state(GoldParseStateC* gs, StateClass stcls) nogil:
) )
gs.n_kids_in_stack[i] = 0 gs.n_kids_in_stack[i] = 0
gs.n_kids_in_buffer[i] = 0 gs.n_kids_in_buffer[i] = 0
for i in range(stcls.stack_depth()): for i in range(stcls.stack_depth()):
s_i = stcls.S(i) s_i = stcls.S(i)
if not is_head_unknown(gs, s_i): if not is_head_unknown(gs, s_i):
@ -403,7 +404,7 @@ cdef class RightArc:
return 0 return 0
sent_start = st._sent[st.B_(0).l_edge].sent_start sent_start = st._sent[st.B_(0).l_edge].sent_start
return sent_start != 1 and st.H(st.S(0)) != st.B(0) return sent_start != 1 and st.H(st.S(0)) != st.B(0)
@staticmethod @staticmethod
cdef int transition(StateC* st, attr_t label) nogil: cdef int transition(StateC* st, attr_t label) nogil:
st.add_arc(st.S(0), st.B(0), label) st.add_arc(st.S(0), st.B(0), label)
@ -701,10 +702,10 @@ cdef class ArcEager(TransitionSystem):
output[i] = self.c[i].is_valid(st, self.c[i].label) output[i] = self.c[i].is_valid(st, self.c[i].label)
else: else:
output[i] = is_valid[self.c[i].move] output[i] = is_valid[self.c[i].move]
def get_cost(self, StateClass stcls, gold, int i): def get_cost(self, StateClass stcls, gold, int i):
if not isinstance(gold, ArcEagerGold): if not isinstance(gold, ArcEagerGold):
raise TypeError("Expected ArcEagerGold") raise TypeError(Errors.E909.format(name="ArcEagerGold"))
cdef ArcEagerGold gold_ = gold cdef ArcEagerGold gold_ = gold
gold_state = gold_.c gold_state = gold_.c
n_gold = 0 n_gold = 0
@ -717,7 +718,7 @@ cdef class ArcEager(TransitionSystem):
cdef int set_costs(self, int* is_valid, weight_t* costs, cdef int set_costs(self, int* is_valid, weight_t* costs,
StateClass stcls, gold) except -1: StateClass stcls, gold) except -1:
if not isinstance(gold, ArcEagerGold): if not isinstance(gold, ArcEagerGold):
raise TypeError("Expected ArcEagerGold") raise TypeError(Errors.E909.format(name="ArcEagerGold"))
cdef ArcEagerGold gold_ = gold cdef ArcEagerGold gold_ = gold
gold_.update(stcls) gold_.update(stcls)
gold_state = gold_.c gold_state = gold_.c

View File

@ -1,16 +1,18 @@
from collections import Counter
from libc.stdint cimport int32_t from libc.stdint cimport int32_t
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from collections import Counter
from ...typedefs cimport weight_t, attr_t from ...typedefs cimport weight_t, attr_t
from ...lexeme cimport Lexeme from ...lexeme cimport Lexeme
from ...attrs cimport IS_SPACE from ...attrs cimport IS_SPACE
from ...training.example cimport Example from ...training.example cimport Example
from ...errors import Errors
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC
from .transition_system cimport Transition, do_func_t from .transition_system cimport Transition, do_func_t
from ...errors import Errors
cdef enum: cdef enum:
MISSING MISSING
@ -248,7 +250,7 @@ cdef class BiluoPushDown(TransitionSystem):
def get_cost(self, StateClass stcls, gold, int i): def get_cost(self, StateClass stcls, gold, int i):
if not isinstance(gold, BiluoGold): if not isinstance(gold, BiluoGold):
raise TypeError("Expected BiluoGold") raise TypeError(Errors.E909.format(name="BiluoGold"))
cdef BiluoGold gold_ = gold cdef BiluoGold gold_ = gold
gold_state = gold_.c gold_state = gold_.c
n_gold = 0 n_gold = 0
@ -261,7 +263,7 @@ cdef class BiluoPushDown(TransitionSystem):
cdef int set_costs(self, int* is_valid, weight_t* costs, cdef int set_costs(self, int* is_valid, weight_t* costs,
StateClass stcls, gold) except -1: StateClass stcls, gold) except -1:
if not isinstance(gold, BiluoGold): if not isinstance(gold, BiluoGold):
raise TypeError("Expected BiluoGold") raise TypeError(Errors.E909.format(name="BiluoGold"))
cdef BiluoGold gold_ = gold cdef BiluoGold gold_ = gold
gold_.update(stcls) gold_.update(stcls)
gold_state = gold_.c gold_state = gold_.c

View File

@ -238,7 +238,7 @@ class Morphologizer(Tagger):
truths.append(eg_truths) truths.append(eg_truths)
d_scores, loss = loss_func(scores, truths) d_scores, loss = loss_func(scores, truths)
if self.model.ops.xp.isnan(loss): if self.model.ops.xp.isnan(loss):
raise ValueError("nan value when computing loss") raise ValueError(Errors.E910.format(name=self.name))
return float(loss), d_scores return float(loss), d_scores
def score(self, examples, **kwargs): def score(self, examples, **kwargs):

View File

@ -125,7 +125,7 @@ class SentenceRecognizer(Tagger):
truths.append(eg_truth) truths.append(eg_truth)
d_scores, loss = loss_func(scores, truths) d_scores, loss = loss_func(scores, truths)
if self.model.ops.xp.isnan(loss): if self.model.ops.xp.isnan(loss):
raise ValueError("nan value when computing loss") raise ValueError(Errors.E910.format(name=self.name))
return float(loss), d_scores return float(loss), d_scores
def initialize(self, get_examples, *, nlp=None): def initialize(self, get_examples, *, nlp=None):

View File

@ -15,7 +15,7 @@ from .pipe import Pipe, deserialize_config
from ..language import Language from ..language import Language
from ..attrs import POS, ID from ..attrs import POS, ID
from ..parts_of_speech import X from ..parts_of_speech import X
from ..errors import Errors, TempErrors, Warnings from ..errors import Errors, Warnings
from ..scorer import Scorer from ..scorer import Scorer
from ..training import validate_examples from ..training import validate_examples
from .. import util from .. import util
@ -258,7 +258,7 @@ class Tagger(Pipe):
truths = [eg.get_aligned("TAG", as_string=True) for eg in examples] truths = [eg.get_aligned("TAG", as_string=True) for eg in examples]
d_scores, loss = loss_func(scores, truths) d_scores, loss = loss_func(scores, truths)
if self.model.ops.xp.isnan(loss): if self.model.ops.xp.isnan(loss):
raise ValueError("nan value when computing loss") raise ValueError(Errors.E910.format(name=self.name))
return float(loss), d_scores return float(loss), d_scores
def initialize(self, get_examples, *, nlp=None, labels=None): def initialize(self, get_examples, *, nlp=None, labels=None):

View File

@ -905,7 +905,7 @@ def _auc(x, y):
if np.all(dx <= 0): if np.all(dx <= 0):
direction = -1 direction = -1
else: else:
raise ValueError(Errors.E164.format(x)) raise ValueError(Errors.E164.format(x=x))
area = direction * np.trapz(y, x) area = direction * np.trapz(y, x)
if isinstance(area, np.memmap): if isinstance(area, np.memmap):

View File

@ -245,7 +245,7 @@ cdef class Doc:
self.noun_chunks_iterator = self.vocab.get_noun_chunks self.noun_chunks_iterator = self.vocab.get_noun_chunks
cdef bint has_space cdef bint has_space
if words is None and spaces is not None: if words is None and spaces is not None:
raise ValueError("words must be set if spaces is set") raise ValueError(Errors.E908)
elif spaces is None and words is not None: elif spaces is None and words is not None:
self.has_unknown_spaces = True self.has_unknown_spaces = True
else: else:
@ -309,7 +309,7 @@ cdef class Doc:
else: else:
if len(ent) < 3 or ent[1] != "-": if len(ent) < 3 or ent[1] != "-":
raise ValueError(Errors.E177.format(tag=ent)) raise ValueError(Errors.E177.format(tag=ent))
ent_iob, ent_type = ent.split("-", 1) ent_iob, ent_type = ent.split("-", 1)
if ent_iob not in iob_strings: if ent_iob not in iob_strings:
raise ValueError(Errors.E177.format(tag=ent)) raise ValueError(Errors.E177.format(tag=ent))
ent_iob = iob_strings.index(ent_iob) ent_iob = iob_strings.index(ent_iob)

View File

@ -17,7 +17,7 @@ from ..lexeme cimport Lexeme
from ..symbols cimport dep from ..symbols cimport dep
from ..util import normalize_slice from ..util import normalize_slice
from ..errors import Errors, TempErrors, Warnings from ..errors import Errors, Warnings
from .underscore import Underscore, get_ext_args from .underscore import Underscore, get_ext_args
@ -652,7 +652,7 @@ cdef class Span:
return self.root.ent_id return self.root.ent_id
def __set__(self, hash_t key): def __set__(self, hash_t key):
raise NotImplementedError(TempErrors.T007.format(attr="ent_id")) raise NotImplementedError(Errors.E200.format(attr="ent_id"))
property ent_id_: property ent_id_:
"""RETURNS (str): The (string) entity ID.""" """RETURNS (str): The (string) entity ID."""
@ -660,7 +660,7 @@ cdef class Span:
return self.root.ent_id_ return self.root.ent_id_
def __set__(self, hash_t key): def __set__(self, hash_t key):
raise NotImplementedError(TempErrors.T007.format(attr="ent_id_")) raise NotImplementedError(Errors.E200.format(attr="ent_id_"))
@property @property
def orth_(self): def orth_(self):

View File

@ -3,7 +3,7 @@ from wasabi import Printer
from .. import tags_to_entities from .. import tags_to_entities
from ...training import iob_to_biluo from ...training import iob_to_biluo
from ...tokens import Doc, Span from ...tokens import Doc, Span
from ...util import load_model from ...errors import Errors
from ...util import load_model, get_lang_class from ...util import load_model, get_lang_class
@ -103,11 +103,7 @@ def conll_ner_to_docs(
lines = [line.strip() for line in conll_sent.split("\n") if line.strip()] lines = [line.strip() for line in conll_sent.split("\n") if line.strip()]
cols = list(zip(*[line.split() for line in lines])) cols = list(zip(*[line.split() for line in lines]))
if len(cols) < 2: if len(cols) < 2:
raise ValueError( raise ValueError(Errors.E093)
"The token-per-line NER file is not formatted correctly. "
"Try checking whitespace and delimiters. See "
"https://nightly.spacy.io/api/cli#convert"
)
length = len(cols[0]) length = len(cols[0])
words.extend(cols[0]) words.extend(cols[0])
sent_starts.extend([True] + [False] * (length - 1)) sent_starts.extend([True] + [False] * (length - 1))

View File

@ -4,6 +4,7 @@ from .conll_ner_to_docs import n_sents_info
from ...vocab import Vocab from ...vocab import Vocab
from ...training import iob_to_biluo, tags_to_entities from ...training import iob_to_biluo, tags_to_entities
from ...tokens import Doc, Span from ...tokens import Doc, Span
from ...errors import Errors
from ...util import minibatch from ...util import minibatch
@ -45,9 +46,7 @@ def read_iob(raw_sents, vocab, n_sents):
sent_words, sent_iob = zip(*sent_tokens) sent_words, sent_iob = zip(*sent_tokens)
sent_tags = ["-"] * len(sent_words) sent_tags = ["-"] * len(sent_words)
else: else:
raise ValueError( raise ValueError(Errors.E092)
"The sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert"
)
words.extend(sent_words) words.extend(sent_words)
tags.extend(sent_tags) tags.extend(sent_tags)
iob.extend(sent_iob) iob.extend(sent_iob)

View File

@ -16,6 +16,7 @@ from ..attrs import ID
from ..ml.models.multi_task import build_cloze_multi_task_model from ..ml.models.multi_task import build_cloze_multi_task_model
from ..ml.models.multi_task import build_cloze_characters_multi_task_model from ..ml.models.multi_task import build_cloze_characters_multi_task_model
from ..schemas import ConfigSchemaTraining, ConfigSchemaPretrain from ..schemas import ConfigSchemaTraining, ConfigSchemaPretrain
from ..errors import Errors
from ..util import registry, load_model_from_config, dot_to_object from ..util import registry, load_model_from_config, dot_to_object
@ -151,9 +152,9 @@ def create_objective(config: Config):
distance = L2Distance(normalize=True, ignore_zeros=True) distance = L2Distance(normalize=True, ignore_zeros=True)
return partial(get_vectors_loss, distance=distance) return partial(get_vectors_loss, distance=distance)
else: else:
raise ValueError("Unexpected loss type", config["loss"]) raise ValueError(Errors.E906.format(loss_type=config["loss"]))
else: else:
raise ValueError("Unexpected objective_type", objective_type) raise ValueError(Errors.E907.format(objective_type=objective_type))
def get_vectors_loss(ops, docs, prediction, distance): def get_vectors_loss(ops, docs, prediction, distance):