mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
Make corpus paths default to None and improve errors
This commit is contained in:
parent
0250bcf6a3
commit
1aeef3bfbb
|
@ -4,8 +4,8 @@ can help generate the best possible configuration, given a user's requirements.
|
|||
{%- set use_transformer = (transformer_data and hardware != "cpu") -%}
|
||||
{%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
|
||||
[paths]
|
||||
train = ""
|
||||
dev = ""
|
||||
train = null
|
||||
dev = null
|
||||
|
||||
[system]
|
||||
{% if use_transformer -%}
|
||||
|
|
|
@ -66,7 +66,7 @@ def init_pipeline(
|
|||
nlp.to_disk(init_path)
|
||||
msg.good(f"Saved initialized pipeline to {init_path}")
|
||||
else:
|
||||
nlp = util.load_model(init_path)
|
||||
nlp = util.load_model(init_path).from_config(config)
|
||||
if must_reinitialize(config, nlp.config):
|
||||
msg.warn("Config has changed: need to re-initialize pipeline")
|
||||
nlp = init_nlp(config, **init_kwargs)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[paths]
|
||||
train = ""
|
||||
dev = ""
|
||||
train = null
|
||||
dev = null
|
||||
vectors = null
|
||||
vocab_data = null
|
||||
init_tok2vec = null
|
||||
|
|
|
@ -477,6 +477,8 @@ class Errors:
|
|||
E201 = ("Span index out of range.")
|
||||
|
||||
# TODO: fix numbering after merging develop into master
|
||||
E913 = ("Corpus path can't be None. Maybe you forgot to define it in your "
|
||||
"config.cfg or override it on the CLI?")
|
||||
E914 = ("Executing {name} callback failed. Expected the function to "
|
||||
"return the nlp object but got: {value}. Maybe you forgot to return "
|
||||
"the modified object in your function?")
|
||||
|
|
|
@ -14,8 +14,8 @@ from ..util import make_tempdir
|
|||
|
||||
nlp_config_string = """
|
||||
[paths]
|
||||
train = ""
|
||||
dev = ""
|
||||
train = null
|
||||
dev = null
|
||||
|
||||
[corpora]
|
||||
|
||||
|
@ -309,7 +309,7 @@ def test_config_interpolation():
|
|||
config = Config().from_str(nlp_config_string, interpolate=False)
|
||||
assert config["corpora"]["train"]["path"] == "${paths.train}"
|
||||
interpolated = config.interpolate()
|
||||
assert interpolated["corpora"]["train"]["path"] == ""
|
||||
assert interpolated["corpora"]["train"]["path"] is None
|
||||
nlp = English.from_config(config)
|
||||
assert nlp.config["corpora"]["train"]["path"] == "${paths.train}"
|
||||
# Ensure that variables are preserved in nlp config
|
||||
|
@ -317,10 +317,10 @@ def test_config_interpolation():
|
|||
assert config["components"]["tagger"]["model"]["tok2vec"]["width"] == width
|
||||
assert nlp.config["components"]["tagger"]["model"]["tok2vec"]["width"] == width
|
||||
interpolated2 = nlp.config.interpolate()
|
||||
assert interpolated2["corpora"]["train"]["path"] == ""
|
||||
assert interpolated2["corpora"]["train"]["path"] is None
|
||||
assert interpolated2["components"]["tagger"]["model"]["tok2vec"]["width"] == 342
|
||||
nlp2 = English.from_config(interpolated)
|
||||
assert nlp2.config["corpora"]["train"]["path"] == ""
|
||||
assert nlp2.config["corpora"]["train"]["path"] is None
|
||||
assert nlp2.config["components"]["tagger"]["model"]["tok2vec"]["width"] == 342
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ import srsly
|
|||
from .. import util
|
||||
from .augment import dont_augment
|
||||
from .example import Example
|
||||
from ..errors import Warnings
|
||||
from ..errors import Warnings, Errors
|
||||
from ..tokens import DocBin, Doc
|
||||
from ..vocab import Vocab
|
||||
|
||||
|
@ -20,12 +20,14 @@ FILE_TYPE = ".spacy"
|
|||
|
||||
@util.registry.readers("spacy.Corpus.v1")
|
||||
def create_docbin_reader(
|
||||
path: Path,
|
||||
path: Optional[Path],
|
||||
gold_preproc: bool,
|
||||
max_length: int = 0,
|
||||
limit: int = 0,
|
||||
augmenter: Optional[Callable] = None,
|
||||
) -> Callable[["Language"], Iterable[Example]]:
|
||||
if path is None:
|
||||
raise ValueError(Errors.E913)
|
||||
return Corpus(
|
||||
path,
|
||||
gold_preproc=gold_preproc,
|
||||
|
|
Loading…
Reference in New Issue
Block a user