This commit is contained in:
ines 2018-04-06 00:38:48 +02:00
commit fb4eda6616
4 changed files with 18 additions and 13 deletions

View File

@ -3,13 +3,13 @@
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
__title__ = 'spacy'
__version__ = '2.0.11.dev0'
__version__ = '2.0.11'
__summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython'
__uri__ = 'https://spacy.io'
__author__ = 'Explosion AI'
__email__ = 'contact@explosion.ai'
__license__ = 'MIT'
__release__ = False
__release__ = True
__download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json'

View File

@ -333680,7 +333680,7 @@ LOOKUP = {
"zurliniane": "zurliniano",
"zurliniani": "zurliniano",
"àncore": "àncora",
"sono": "essere"
"sono": "essere",
"è": "essere",
"èlites": "èlite",
"ère": "èra",

View File

@ -636,11 +636,11 @@ class Language(object):
"""
path = util.ensure_path(path)
deserializers = OrderedDict((
('vocab', lambda p: self.vocab.from_disk(p)),
('meta.json', lambda p: self.meta.update(util.read_json(p))),
('vocab', lambda p: (
self.vocab.from_disk(p) and _fix_pretrained_vectors_name(self))),
('tokenizer', lambda p: self.tokenizer.from_disk(p, vocab=False)),
('meta.json', lambda p: self.meta.update(util.read_json(p)))
))
_fix_pretrained_vectors_name(self)
for name, proc in self.pipeline:
if name in disable:
continue
@ -682,11 +682,11 @@ class Language(object):
RETURNS (Language): The `Language` object.
"""
deserializers = OrderedDict((
('vocab', lambda b: self.vocab.from_bytes(b)),
('meta', lambda b: self.meta.update(ujson.loads(b))),
('vocab', lambda b: (
self.vocab.from_bytes(b) and _fix_pretrained_vectors_name(self))),
('tokenizer', lambda b: self.tokenizer.from_bytes(b, vocab=False)),
('meta', lambda b: self.meta.update(ujson.loads(b)))
))
_fix_pretrained_vectors_name(self)
for i, (name, proc) in enumerate(self.pipeline):
if name in disable:
continue
@ -696,6 +696,7 @@ class Language(object):
msg = util.from_bytes(bytes_data, deserializers, {})
return self
def _fix_pretrained_vectors_name(nlp):
# TODO: Replace this once we handle vectors consistently as static
# data
@ -708,12 +709,13 @@ def _fix_pretrained_vectors_name(nlp):
nlp.vocab.vectors.name = vectors_name
else:
raise ValueError(Errors.E092)
if nlp.vocab.vectors.size != 0:
link_vectors_to_models(nlp.vocab)
for name, proc in nlp.pipeline:
if not hasattr(proc, 'cfg'):
continue
if proc.cfg.get('pretrained_dims'):
assert nlp.vocab.vectors.name
proc.cfg['pretrained_vectors'] = nlp.vocab.vectors.name
proc.cfg.setdefault('deprecation_fixes', {})
proc.cfg['deprecation_fixes']['vectors_name'] = nlp.vocab.vectors.name
class DisabledPipes(list):

View File

@ -546,7 +546,10 @@ cdef class Parser:
if len(docs) != len(golds):
raise ValueError(Errors.E077.format(value='update', n_docs=len(docs),
n_golds=len(golds)))
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.0:
# The probability we use beam update, instead of falling back to
# a greedy update
beam_update_prob = 1-self.cfg.get('beam_update_prob', 0.5)
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= beam_update_prob:
return self.update_beam(docs, golds,
self.cfg['beam_width'], self.cfg['beam_density'],
drop=drop, sgd=sgd, losses=losses)