Fix base schema integration

This commit is contained in:
Ines Montani 2020-09-27 22:50:36 +02:00
parent e04bd16f7f
commit 658fad428a
2 changed files with 8 additions and 7 deletions

View File

@ -27,7 +27,7 @@ from .lang.punctuation import TOKENIZER_INFIXES
from .tokens import Doc
from .tokenizer import Tokenizer
from .errors import Errors, Warnings
from .schemas import ConfigSchema
from .schemas import ConfigSchema, ConfigSchemaNlp
from .git_info import GIT_VERSION
from . import util
from . import about
@ -1518,10 +1518,15 @@ class Language:
config = util.copy_config(config)
orig_pipeline = config.pop("components", {})
config["components"] = {}
if auto_fill:
filled = registry.fill(config, validate=validate, schema=ConfigSchema)
else:
filled = config
filled["components"] = orig_pipeline
config["components"] = orig_pipeline
resolved_nlp = registry.resolve(filled["nlp"], validate=validate)
resolved_nlp = registry.resolve(
filled["nlp"], validate=validate, schema=ConfigSchemaNlp
)
create_tokenizer = resolved_nlp["tokenizer"]
before_creation = resolved_nlp["before_creation"]
after_creation = resolved_nlp["after_creation"]

View File

@ -295,10 +295,6 @@ class ConfigSchema(BaseModel):
arbitrary_types_allowed = True
class NlpSchema(BaseModel):
nlp: ConfigSchemaNlp
class TrainingSchema(BaseModel):
training: ConfigSchemaTraining
pretraining: Union[ConfigSchemaPretrain, ConfigSchemaPretrainEmpty] = {}