mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
commit
fb4eda6616
|
@ -3,13 +3,13 @@
|
|||
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
|
||||
|
||||
__title__ = 'spacy'
|
||||
__version__ = '2.0.11.dev0'
|
||||
__version__ = '2.0.11'
|
||||
__summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython'
|
||||
__uri__ = 'https://spacy.io'
|
||||
__author__ = 'Explosion AI'
|
||||
__email__ = 'contact@explosion.ai'
|
||||
__license__ = 'MIT'
|
||||
__release__ = False
|
||||
__release__ = True
|
||||
|
||||
__download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
|
||||
__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json'
|
||||
|
|
|
@ -333680,7 +333680,7 @@ LOOKUP = {
|
|||
"zurliniane": "zurliniano",
|
||||
"zurliniani": "zurliniano",
|
||||
"àncore": "àncora",
|
||||
"sono": "essere"
|
||||
"sono": "essere",
|
||||
"è": "essere",
|
||||
"èlites": "èlite",
|
||||
"ère": "èra",
|
||||
|
|
|
@ -636,11 +636,11 @@ class Language(object):
|
|||
"""
|
||||
path = util.ensure_path(path)
|
||||
deserializers = OrderedDict((
|
||||
('vocab', lambda p: self.vocab.from_disk(p)),
|
||||
('meta.json', lambda p: self.meta.update(util.read_json(p))),
|
||||
('vocab', lambda p: (
|
||||
self.vocab.from_disk(p) and _fix_pretrained_vectors_name(self))),
|
||||
('tokenizer', lambda p: self.tokenizer.from_disk(p, vocab=False)),
|
||||
('meta.json', lambda p: self.meta.update(util.read_json(p)))
|
||||
))
|
||||
_fix_pretrained_vectors_name(self)
|
||||
for name, proc in self.pipeline:
|
||||
if name in disable:
|
||||
continue
|
||||
|
@ -682,11 +682,11 @@ class Language(object):
|
|||
RETURNS (Language): The `Language` object.
|
||||
"""
|
||||
deserializers = OrderedDict((
|
||||
('vocab', lambda b: self.vocab.from_bytes(b)),
|
||||
('meta', lambda b: self.meta.update(ujson.loads(b))),
|
||||
('vocab', lambda b: (
|
||||
self.vocab.from_bytes(b) and _fix_pretrained_vectors_name(self))),
|
||||
('tokenizer', lambda b: self.tokenizer.from_bytes(b, vocab=False)),
|
||||
('meta', lambda b: self.meta.update(ujson.loads(b)))
|
||||
))
|
||||
_fix_pretrained_vectors_name(self)
|
||||
for i, (name, proc) in enumerate(self.pipeline):
|
||||
if name in disable:
|
||||
continue
|
||||
|
@ -696,6 +696,7 @@ class Language(object):
|
|||
msg = util.from_bytes(bytes_data, deserializers, {})
|
||||
return self
|
||||
|
||||
|
||||
def _fix_pretrained_vectors_name(nlp):
|
||||
# TODO: Replace this once we handle vectors consistently as static
|
||||
# data
|
||||
|
@ -708,12 +709,13 @@ def _fix_pretrained_vectors_name(nlp):
|
|||
nlp.vocab.vectors.name = vectors_name
|
||||
else:
|
||||
raise ValueError(Errors.E092)
|
||||
if nlp.vocab.vectors.size != 0:
|
||||
link_vectors_to_models(nlp.vocab)
|
||||
for name, proc in nlp.pipeline:
|
||||
if not hasattr(proc, 'cfg'):
|
||||
continue
|
||||
if proc.cfg.get('pretrained_dims'):
|
||||
assert nlp.vocab.vectors.name
|
||||
proc.cfg['pretrained_vectors'] = nlp.vocab.vectors.name
|
||||
proc.cfg.setdefault('deprecation_fixes', {})
|
||||
proc.cfg['deprecation_fixes']['vectors_name'] = nlp.vocab.vectors.name
|
||||
|
||||
|
||||
class DisabledPipes(list):
|
||||
|
|
|
@ -546,7 +546,10 @@ cdef class Parser:
|
|||
if len(docs) != len(golds):
|
||||
raise ValueError(Errors.E077.format(value='update', n_docs=len(docs),
|
||||
n_golds=len(golds)))
|
||||
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.0:
|
||||
# The probability we use beam update, instead of falling back to
|
||||
# a greedy update
|
||||
beam_update_prob = 1-self.cfg.get('beam_update_prob', 0.5)
|
||||
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= beam_update_prob:
|
||||
return self.update_beam(docs, golds,
|
||||
self.cfg['beam_width'], self.cfg['beam_density'],
|
||||
drop=drop, sgd=sgd, losses=losses)
|
||||
|
|
Loading…
Reference in New Issue
Block a user