mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 08:12:24 +03:00
Implement new Language methods and pipeline API
This commit is contained in:
parent
3468d535ad
commit
212c8f0711
|
@ -70,59 +70,7 @@ class BaseDefaults(object):
|
||||||
prefix_search=prefix_search, suffix_search=suffix_search,
|
prefix_search=prefix_search, suffix_search=suffix_search,
|
||||||
infix_finditer=infix_finditer, token_match=token_match)
|
infix_finditer=infix_finditer, token_match=token_match)
|
||||||
|
|
||||||
@classmethod
|
pipe_names = ['tensorizer', 'tagger', 'parser', 'ner']
|
||||||
def create_tagger(cls, nlp=None, **cfg):
|
|
||||||
if nlp is None:
|
|
||||||
return NeuralTagger(cls.create_vocab(nlp), **cfg)
|
|
||||||
else:
|
|
||||||
return NeuralTagger(nlp.vocab, **cfg)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def create_parser(cls, nlp=None, **cfg):
|
|
||||||
if nlp is None:
|
|
||||||
return NeuralDependencyParser(cls.create_vocab(nlp), **cfg)
|
|
||||||
else:
|
|
||||||
return NeuralDependencyParser(nlp.vocab, **cfg)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def create_entity(cls, nlp=None, **cfg):
|
|
||||||
if nlp is None:
|
|
||||||
return NeuralEntityRecognizer(cls.create_vocab(nlp), **cfg)
|
|
||||||
else:
|
|
||||||
return NeuralEntityRecognizer(nlp.vocab, **cfg)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def create_pipeline(cls, nlp=None, disable=tuple()):
|
|
||||||
meta = nlp.meta if nlp is not None else {}
|
|
||||||
# Resolve strings, like "cnn", "lstm", etc
|
|
||||||
pipeline = []
|
|
||||||
for entry in meta.get('pipeline', []):
|
|
||||||
if entry in disable or getattr(entry, 'name', entry) in disable:
|
|
||||||
continue
|
|
||||||
factory = cls.Defaults.factories[entry]
|
|
||||||
pipeline.append(factory(nlp, **meta.get(entry, {})))
|
|
||||||
return pipeline
|
|
||||||
|
|
||||||
factories = {
|
|
||||||
'make_doc': create_tokenizer,
|
|
||||||
'tensorizer': lambda nlp, **cfg: [TokenVectorEncoder(nlp.vocab, **cfg)],
|
|
||||||
'tagger': lambda nlp, **cfg: [NeuralTagger(nlp.vocab, **cfg)],
|
|
||||||
'parser': lambda nlp, **cfg: [
|
|
||||||
NeuralDependencyParser(nlp.vocab, **cfg),
|
|
||||||
nonproj.deprojectivize],
|
|
||||||
'ner': lambda nlp, **cfg: [NeuralEntityRecognizer(nlp.vocab, **cfg)],
|
|
||||||
'similarity': lambda nlp, **cfg: [SimilarityHook(nlp.vocab, **cfg)],
|
|
||||||
'textcat': lambda nlp, **cfg: [TextCategorizer(nlp.vocab, **cfg)],
|
|
||||||
# Temporary compatibility -- delete after pivot
|
|
||||||
'token_vectors': lambda nlp, **cfg: [TokenVectorEncoder(nlp.vocab, **cfg)],
|
|
||||||
'tags': lambda nlp, **cfg: [NeuralTagger(nlp.vocab, **cfg)],
|
|
||||||
'dependencies': lambda nlp, **cfg: [
|
|
||||||
NeuralDependencyParser(nlp.vocab, **cfg),
|
|
||||||
nonproj.deprojectivize,
|
|
||||||
],
|
|
||||||
'entities': lambda nlp, **cfg: [NeuralEntityRecognizer(nlp.vocab, **cfg)],
|
|
||||||
}
|
|
||||||
|
|
||||||
token_match = TOKEN_MATCH
|
token_match = TOKEN_MATCH
|
||||||
prefixes = tuple(TOKENIZER_PREFIXES)
|
prefixes = tuple(TOKENIZER_PREFIXES)
|
||||||
suffixes = tuple(TOKENIZER_SUFFIXES)
|
suffixes = tuple(TOKENIZER_SUFFIXES)
|
||||||
|
@ -152,8 +100,17 @@ class Language(object):
|
||||||
Defaults = BaseDefaults
|
Defaults = BaseDefaults
|
||||||
lang = None
|
lang = None
|
||||||
|
|
||||||
def __init__(self, vocab=True, make_doc=True, pipeline=None,
|
factories = {
|
||||||
meta={}, disable=tuple(), **kwargs):
|
'tokenizer': lambda nlp: nlp.Defaults.create_tokenizer(nlp),
|
||||||
|
'tensorizer': lambda nlp, **cfg: TokenVectorEncoder(nlp.vocab, **cfg),
|
||||||
|
'tagger': lambda nlp, **cfg: NeuralTagger(nlp.vocab, **cfg),
|
||||||
|
'parser': lambda nlp, **cfg: NeuralDependencyParser(nlp.vocab, **cfg), # nonproj.deprojectivize,
|
||||||
|
'ner': lambda nlp, **cfg: NeuralEntityRecognizer(nlp.vocab, **cfg),
|
||||||
|
'similarity': lambda nlp, **cfg: SimilarityHook(nlp.vocab, **cfg),
|
||||||
|
'textcat': lambda nlp, **cfg: TextCategorizer(nlp.vocab, **cfg)
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, vocab=True, make_doc=True, meta={}, **kwargs):
|
||||||
"""Initialise a Language object.
|
"""Initialise a Language object.
|
||||||
|
|
||||||
vocab (Vocab): A `Vocab` object. If `True`, a vocab is created via
|
vocab (Vocab): A `Vocab` object. If `True`, a vocab is created via
|
||||||
|
@ -179,28 +136,7 @@ class Language(object):
|
||||||
factory = self.Defaults.create_tokenizer
|
factory = self.Defaults.create_tokenizer
|
||||||
make_doc = factory(self, **meta.get('tokenizer', {}))
|
make_doc = factory(self, **meta.get('tokenizer', {}))
|
||||||
self.tokenizer = make_doc
|
self.tokenizer = make_doc
|
||||||
if pipeline is True:
|
self.pipeline = []
|
||||||
self.pipeline = self.Defaults.create_pipeline(self, disable)
|
|
||||||
elif pipeline:
|
|
||||||
# Careful not to do getattr(p, 'name', None) here
|
|
||||||
# If we had disable=[None], we'd disable everything!
|
|
||||||
self.pipeline = [p for p in pipeline
|
|
||||||
if p not in disable
|
|
||||||
and getattr(p, 'name', p) not in disable]
|
|
||||||
# Resolve strings, like "cnn", "lstm", etc
|
|
||||||
for i, entry in enumerate(self.pipeline):
|
|
||||||
if entry in self.Defaults.factories:
|
|
||||||
factory = self.Defaults.factories[entry]
|
|
||||||
self.pipeline[i] = factory(self, **meta.get(entry, {}))
|
|
||||||
else:
|
|
||||||
self.pipeline = []
|
|
||||||
flat_list = []
|
|
||||||
for pipe in self.pipeline:
|
|
||||||
if isinstance(pipe, list):
|
|
||||||
flat_list.extend(pipe)
|
|
||||||
else:
|
|
||||||
flat_list.append(pipe)
|
|
||||||
self.pipeline = flat_list
|
|
||||||
self._optimizer = None
|
self._optimizer = None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -214,11 +150,7 @@ class Language(object):
|
||||||
self._meta.setdefault('email', '')
|
self._meta.setdefault('email', '')
|
||||||
self._meta.setdefault('url', '')
|
self._meta.setdefault('url', '')
|
||||||
self._meta.setdefault('license', '')
|
self._meta.setdefault('license', '')
|
||||||
pipeline = []
|
self._meta['pipeline'] = self.pipe_names
|
||||||
for component in self.pipeline:
|
|
||||||
if hasattr(component, 'name'):
|
|
||||||
pipeline.append(component.name)
|
|
||||||
self._meta['pipeline'] = pipeline
|
|
||||||
return self._meta
|
return self._meta
|
||||||
|
|
||||||
@meta.setter
|
@meta.setter
|
||||||
|
@ -228,31 +160,133 @@ class Language(object):
|
||||||
# Conveniences to access pipeline components
|
# Conveniences to access pipeline components
|
||||||
@property
|
@property
|
||||||
def tensorizer(self):
|
def tensorizer(self):
|
||||||
return self.get_component('tensorizer')
|
return self.get_pipe('tensorizer')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def tagger(self):
|
def tagger(self):
|
||||||
return self.get_component('tagger')
|
return self.get_pipe('tagger')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parser(self):
|
def parser(self):
|
||||||
return self.get_component('parser')
|
return self.get_pipe('parser')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def entity(self):
|
def entity(self):
|
||||||
return self.get_component('ner')
|
return self.get_pipe('ner')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def matcher(self):
|
def matcher(self):
|
||||||
return self.get_component('matcher')
|
return self.get_pipe('matcher')
|
||||||
|
|
||||||
def get_component(self, name):
|
@property
|
||||||
if self.pipeline in (True, None):
|
def pipe_names(self):
|
||||||
return None
|
"""Get names of available pipeline components.
|
||||||
for proc in self.pipeline:
|
|
||||||
if hasattr(proc, 'name') and proc.name.endswith(name):
|
RETURNS (list): List of component name strings, in order.
|
||||||
return proc
|
"""
|
||||||
return None
|
return [pipe_name for pipe_name, _ in self.pipeline]
|
||||||
|
|
||||||
|
def get_pipe(self, name):
|
||||||
|
"""Get a pipeline component for a given component name.
|
||||||
|
|
||||||
|
name (unicode): Name of pipeline component to get.
|
||||||
|
RETURNS (callable): The pipeline component.
|
||||||
|
"""
|
||||||
|
for pipe_name, component in self.pipeline:
|
||||||
|
if pipe_name == name:
|
||||||
|
return component
|
||||||
|
msg = "No component '{}' found in pipeline. Available names: {}"
|
||||||
|
raise KeyError(msg.format(name, self.pipe_names))
|
||||||
|
|
||||||
|
def create_pipe(self, name, config=dict()):
|
||||||
|
"""Create a pipeline component from a factory.
|
||||||
|
|
||||||
|
name (unicode): Factory name to look up in `Language.factories`.
|
||||||
|
RETURNS (callable): Pipeline component.
|
||||||
|
"""
|
||||||
|
if name not in self.factories:
|
||||||
|
raise KeyError("Can't find factory for '{}'.".format(name))
|
||||||
|
factory = self.factories[name]
|
||||||
|
return factory(self, **config)
|
||||||
|
|
||||||
|
def add_pipe(self, component, name=None, before=None, after=None,
|
||||||
|
first=None, last=None):
|
||||||
|
"""Add a component to the processing pipeline. Valid components are
|
||||||
|
callables that take a `Doc` object, modify it and return it. Only one of
|
||||||
|
before, after, first or last can be set. Default behaviour is "last".
|
||||||
|
|
||||||
|
component (callable): The pipeline component.
|
||||||
|
name (unicode): Name of pipeline component. Overwrites existing
|
||||||
|
component.name attribute if available. If no name is set and
|
||||||
|
the component exposes no name attribute, component.__name__ is
|
||||||
|
used. An error is raised if the name already exists in the pipeline.
|
||||||
|
before (unicode): Component name to insert component directly before.
|
||||||
|
after (unicode): Component name to insert component directly after.
|
||||||
|
first (bool): Insert component first / not first in the pipeline.
|
||||||
|
last (bool): Insert component last / not last in the pipeline.
|
||||||
|
|
||||||
|
EXAMPLE:
|
||||||
|
>>> nlp.add_pipe(component, before='ner')
|
||||||
|
>>> nlp.add_pipe(component, name='custom_name', last=True)
|
||||||
|
"""
|
||||||
|
if name is None:
|
||||||
|
name = getattr(component, 'name', component.__name__)
|
||||||
|
if name in self.pipe_names:
|
||||||
|
raise ValueError("'{}' already exists in pipeline.".format(name))
|
||||||
|
if sum([bool(before), bool(after), bool(first), bool(last)]) >= 2:
|
||||||
|
msg = ("Invalid constraints. You can only set one of the "
|
||||||
|
"following: before, after, first, last.")
|
||||||
|
raise ValueError(msg)
|
||||||
|
pipe = (name, component)
|
||||||
|
if last or not any([first, before, after]):
|
||||||
|
self.pipeline.append(pipe)
|
||||||
|
elif first:
|
||||||
|
self.pipeline.insert(0, pipe)
|
||||||
|
elif before and before in self.pipe_names:
|
||||||
|
self.pipeline.insert(self.pipe_names.index(before), pipe)
|
||||||
|
elif after and after in self.pipe_names:
|
||||||
|
self.pipeline.insert(self.pipe_names.index(after), pipe)
|
||||||
|
else:
|
||||||
|
msg = "Can't find '{}' in pipeline. Available names: {}"
|
||||||
|
unfound = before or after
|
||||||
|
raise ValueError(msg.format(unfound, self.pipe_names))
|
||||||
|
|
||||||
|
def replace_pipe(self, name, component):
|
||||||
|
"""Replace a component in the pipeline.
|
||||||
|
|
||||||
|
name (unicode): Name of the component to replace.
|
||||||
|
component (callable): Pipeline component.
|
||||||
|
"""
|
||||||
|
if name not in self.pipe_names:
|
||||||
|
msg = "Can't find '{}' in pipeline. Available names: {}"
|
||||||
|
raise ValueError(msg.format(name, self.pipe_names))
|
||||||
|
self.pipeline[self.pipe_names.index(name)] = (name, component)
|
||||||
|
|
||||||
|
def rename_pipe(self, old_name, new_name):
|
||||||
|
"""Rename a pipeline component.
|
||||||
|
|
||||||
|
old_name (unicode): Name of the component to rename.
|
||||||
|
new_name (unicode): New name of the component.
|
||||||
|
"""
|
||||||
|
if old_name not in self.pipe_names:
|
||||||
|
msg = "Can't find '{}' in pipeline. Available names: {}"
|
||||||
|
raise ValueError(msg.format(old_name, self.pipe_names))
|
||||||
|
if new_name in self.pipe_names:
|
||||||
|
msg = "'{}' already exists in pipeline. Existing names: {}"
|
||||||
|
raise ValueError(msg.format(new_name, self.pipe_names))
|
||||||
|
i = self.pipe_names.index(old_name)
|
||||||
|
self.pipeline[i] = (new_name, self.pipeline[i][1])
|
||||||
|
|
||||||
|
def remove_pipe(self, name):
|
||||||
|
"""Remove a component from the pipeline.
|
||||||
|
|
||||||
|
name (unicode): Name of the component to remove.
|
||||||
|
RETURNS (tuple): A (name, component) tuple of the removed component.
|
||||||
|
"""
|
||||||
|
if name not in self.pipe_names:
|
||||||
|
msg = "Can't find '{}' in pipeline. Available names: {}"
|
||||||
|
raise ValueError(msg.format(name, self.pipe_names))
|
||||||
|
return self.pipeline.pop(self.pipe_names.index(name))
|
||||||
|
|
||||||
def __call__(self, text, disable=[]):
|
def __call__(self, text, disable=[]):
|
||||||
"""'Apply the pipeline to some text. The text can span multiple sentences,
|
"""'Apply the pipeline to some text. The text can span multiple sentences,
|
||||||
|
@ -269,8 +303,7 @@ class Language(object):
|
||||||
('An', 'NN')
|
('An', 'NN')
|
||||||
"""
|
"""
|
||||||
doc = self.make_doc(text)
|
doc = self.make_doc(text)
|
||||||
for proc in self.pipeline:
|
for name, proc in self.pipeline:
|
||||||
name = getattr(proc, 'name', None)
|
|
||||||
if name in disable:
|
if name in disable:
|
||||||
continue
|
continue
|
||||||
doc = proc(doc)
|
doc = proc(doc)
|
||||||
|
@ -308,7 +341,7 @@ class Language(object):
|
||||||
grads[key] = (W, dW)
|
grads[key] = (W, dW)
|
||||||
pipes = list(self.pipeline)
|
pipes = list(self.pipeline)
|
||||||
random.shuffle(pipes)
|
random.shuffle(pipes)
|
||||||
for proc in pipes:
|
for name, proc in pipes:
|
||||||
if not hasattr(proc, 'update'):
|
if not hasattr(proc, 'update'):
|
||||||
continue
|
continue
|
||||||
proc.update(docs, golds, drop=drop, sgd=get_grads, losses=losses)
|
proc.update(docs, golds, drop=drop, sgd=get_grads, losses=losses)
|
||||||
|
@ -322,7 +355,7 @@ class Language(object):
|
||||||
docs_golds (iterable): Tuples of `Doc` and `GoldParse` objects.
|
docs_golds (iterable): Tuples of `Doc` and `GoldParse` objects.
|
||||||
YIELDS (tuple): Tuples of preprocessed `Doc` and `GoldParse` objects.
|
YIELDS (tuple): Tuples of preprocessed `Doc` and `GoldParse` objects.
|
||||||
"""
|
"""
|
||||||
for proc in self.pipeline:
|
for name, proc in self.pipeline:
|
||||||
if hasattr(proc, 'preprocess_gold'):
|
if hasattr(proc, 'preprocess_gold'):
|
||||||
docs_golds = proc.preprocess_gold(docs_golds)
|
docs_golds = proc.preprocess_gold(docs_golds)
|
||||||
for doc, gold in docs_golds:
|
for doc, gold in docs_golds:
|
||||||
|
@ -371,7 +404,7 @@ class Language(object):
|
||||||
else:
|
else:
|
||||||
device = None
|
device = None
|
||||||
link_vectors_to_models(self.vocab)
|
link_vectors_to_models(self.vocab)
|
||||||
for proc in self.pipeline:
|
for name, proc in self.pipeline:
|
||||||
if hasattr(proc, 'begin_training'):
|
if hasattr(proc, 'begin_training'):
|
||||||
context = proc.begin_training(get_gold_tuples(),
|
context = proc.begin_training(get_gold_tuples(),
|
||||||
pipeline=self.pipeline)
|
pipeline=self.pipeline)
|
||||||
|
@ -393,7 +426,7 @@ class Language(object):
|
||||||
docs, golds = zip(*docs_golds)
|
docs, golds = zip(*docs_golds)
|
||||||
docs = list(docs)
|
docs = list(docs)
|
||||||
golds = list(golds)
|
golds = list(golds)
|
||||||
for pipe in self.pipeline:
|
for name, pipe in self.pipeline:
|
||||||
if not hasattr(pipe, 'pipe'):
|
if not hasattr(pipe, 'pipe'):
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
pipe(doc)
|
pipe(doc)
|
||||||
|
@ -419,7 +452,7 @@ class Language(object):
|
||||||
>>> with nlp.use_params(optimizer.averages):
|
>>> with nlp.use_params(optimizer.averages):
|
||||||
>>> nlp.to_disk('/tmp/checkpoint')
|
>>> nlp.to_disk('/tmp/checkpoint')
|
||||||
"""
|
"""
|
||||||
contexts = [pipe.use_params(params) for pipe
|
contexts = [pipe.use_params(params) for name, pipe
|
||||||
in self.pipeline if hasattr(pipe, 'use_params')]
|
in self.pipeline if hasattr(pipe, 'use_params')]
|
||||||
# TODO: Having trouble with contextlib
|
# TODO: Having trouble with contextlib
|
||||||
# Workaround: these aren't actually context managers atm.
|
# Workaround: these aren't actually context managers atm.
|
||||||
|
@ -466,8 +499,7 @@ class Language(object):
|
||||||
yield (doc, context)
|
yield (doc, context)
|
||||||
return
|
return
|
||||||
docs = (self.make_doc(text) for text in texts)
|
docs = (self.make_doc(text) for text in texts)
|
||||||
for proc in self.pipeline:
|
for name, proc in self.pipeline:
|
||||||
name = getattr(proc, 'name', None)
|
|
||||||
if name in disable:
|
if name in disable:
|
||||||
continue
|
continue
|
||||||
if hasattr(proc, 'pipe'):
|
if hasattr(proc, 'pipe'):
|
||||||
|
@ -495,14 +527,14 @@ class Language(object):
|
||||||
('tokenizer', lambda p: self.tokenizer.to_disk(p, vocab=False)),
|
('tokenizer', lambda p: self.tokenizer.to_disk(p, vocab=False)),
|
||||||
('meta.json', lambda p: p.open('w').write(json_dumps(self.meta)))
|
('meta.json', lambda p: p.open('w').write(json_dumps(self.meta)))
|
||||||
))
|
))
|
||||||
for proc in self.pipeline:
|
for name, proc in self.pipeline:
|
||||||
if not hasattr(proc, 'name'):
|
if not hasattr(proc, 'name'):
|
||||||
continue
|
continue
|
||||||
if proc.name in disable:
|
if name in disable:
|
||||||
continue
|
continue
|
||||||
if not hasattr(proc, 'to_disk'):
|
if not hasattr(proc, 'to_disk'):
|
||||||
continue
|
continue
|
||||||
serializers[proc.name] = lambda p, proc=proc: proc.to_disk(p, vocab=False)
|
serializers[name] = lambda p, proc=proc: proc.to_disk(p, vocab=False)
|
||||||
serializers['vocab'] = lambda p: self.vocab.to_disk(p)
|
serializers['vocab'] = lambda p: self.vocab.to_disk(p)
|
||||||
util.to_disk(path, serializers, {p: False for p in disable})
|
util.to_disk(path, serializers, {p: False for p in disable})
|
||||||
|
|
||||||
|
@ -526,14 +558,12 @@ class Language(object):
|
||||||
('tokenizer', lambda p: self.tokenizer.from_disk(p, vocab=False)),
|
('tokenizer', lambda p: self.tokenizer.from_disk(p, vocab=False)),
|
||||||
('meta.json', lambda p: p.open('w').write(json_dumps(self.meta)))
|
('meta.json', lambda p: p.open('w').write(json_dumps(self.meta)))
|
||||||
))
|
))
|
||||||
for proc in self.pipeline:
|
for name, proc in self.pipeline:
|
||||||
if not hasattr(proc, 'name'):
|
if name in disable:
|
||||||
continue
|
|
||||||
if proc.name in disable:
|
|
||||||
continue
|
continue
|
||||||
if not hasattr(proc, 'to_disk'):
|
if not hasattr(proc, 'to_disk'):
|
||||||
continue
|
continue
|
||||||
deserializers[proc.name] = lambda p, proc=proc: proc.from_disk(p, vocab=False)
|
deserializers[name] = lambda p, proc=proc: proc.from_disk(p, vocab=False)
|
||||||
exclude = {p: False for p in disable}
|
exclude = {p: False for p in disable}
|
||||||
if not (path / 'vocab').exists():
|
if not (path / 'vocab').exists():
|
||||||
exclude['vocab'] = True
|
exclude['vocab'] = True
|
||||||
|
@ -552,8 +582,8 @@ class Language(object):
|
||||||
('tokenizer', lambda: self.tokenizer.to_bytes(vocab=False)),
|
('tokenizer', lambda: self.tokenizer.to_bytes(vocab=False)),
|
||||||
('meta', lambda: ujson.dumps(self.meta))
|
('meta', lambda: ujson.dumps(self.meta))
|
||||||
))
|
))
|
||||||
for i, proc in enumerate(self.pipeline):
|
for i, (name, proc) in enumerate(self.pipeline):
|
||||||
if getattr(proc, 'name', None) in disable:
|
if name in disable:
|
||||||
continue
|
continue
|
||||||
if not hasattr(proc, 'to_bytes'):
|
if not hasattr(proc, 'to_bytes'):
|
||||||
continue
|
continue
|
||||||
|
@ -572,8 +602,8 @@ class Language(object):
|
||||||
('tokenizer', lambda b: self.tokenizer.from_bytes(b, vocab=False)),
|
('tokenizer', lambda b: self.tokenizer.from_bytes(b, vocab=False)),
|
||||||
('meta', lambda b: self.meta.update(ujson.loads(b)))
|
('meta', lambda b: self.meta.update(ujson.loads(b)))
|
||||||
))
|
))
|
||||||
for i, proc in enumerate(self.pipeline):
|
for i, (name, proc) in enumerate(self.pipeline):
|
||||||
if getattr(proc, 'name', None) in disable:
|
if name in disable:
|
||||||
continue
|
continue
|
||||||
if not hasattr(proc, 'from_bytes'):
|
if not hasattr(proc, 'from_bytes'):
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -135,7 +135,11 @@ def load_model_from_path(model_path, meta=False, **overrides):
|
||||||
if not meta:
|
if not meta:
|
||||||
meta = get_model_meta(model_path)
|
meta = get_model_meta(model_path)
|
||||||
cls = get_lang_class(meta['lang'])
|
cls = get_lang_class(meta['lang'])
|
||||||
nlp = cls(pipeline=meta.get('pipeline', True), meta=meta, **overrides)
|
nlp = cls(meta=meta, **overrides)
|
||||||
|
for name in meta.get('pipeline', []):
|
||||||
|
config = meta.get('pipeline_args', {}).get(name, {})
|
||||||
|
component = nlp.create_pipe(name, config=config)
|
||||||
|
nlp.add_pipe(component, name=name)
|
||||||
return nlp.from_disk(model_path)
|
return nlp.from_disk(model_path)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user