This commit is contained in:
ines 2018-04-06 00:38:48 +02:00
commit fb4eda6616
4 changed files with 18 additions and 13 deletions

View File

@ -3,13 +3,13 @@
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
__title__ = 'spacy' __title__ = 'spacy'
__version__ = '2.0.11.dev0' __version__ = '2.0.11'
__summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython' __summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython'
__uri__ = 'https://spacy.io' __uri__ = 'https://spacy.io'
__author__ = 'Explosion AI' __author__ = 'Explosion AI'
__email__ = 'contact@explosion.ai' __email__ = 'contact@explosion.ai'
__license__ = 'MIT' __license__ = 'MIT'
__release__ = False __release__ = True
__download_url__ = 'https://github.com/explosion/spacy-models/releases/download' __download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json' __compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json'

View File

@ -333680,7 +333680,7 @@ LOOKUP = {
"zurliniane": "zurliniano", "zurliniane": "zurliniano",
"zurliniani": "zurliniano", "zurliniani": "zurliniano",
"àncore": "àncora", "àncore": "àncora",
"sono": "essere" "sono": "essere",
"è": "essere", "è": "essere",
"èlites": "èlite", "èlites": "èlite",
"ère": "èra", "ère": "èra",

View File

@ -636,11 +636,11 @@ class Language(object):
""" """
path = util.ensure_path(path) path = util.ensure_path(path)
deserializers = OrderedDict(( deserializers = OrderedDict((
('vocab', lambda p: self.vocab.from_disk(p)), ('meta.json', lambda p: self.meta.update(util.read_json(p))),
('vocab', lambda p: (
self.vocab.from_disk(p) and _fix_pretrained_vectors_name(self))),
('tokenizer', lambda p: self.tokenizer.from_disk(p, vocab=False)), ('tokenizer', lambda p: self.tokenizer.from_disk(p, vocab=False)),
('meta.json', lambda p: self.meta.update(util.read_json(p)))
)) ))
_fix_pretrained_vectors_name(self)
for name, proc in self.pipeline: for name, proc in self.pipeline:
if name in disable: if name in disable:
continue continue
@ -682,11 +682,11 @@ class Language(object):
RETURNS (Language): The `Language` object. RETURNS (Language): The `Language` object.
""" """
deserializers = OrderedDict(( deserializers = OrderedDict((
('vocab', lambda b: self.vocab.from_bytes(b)), ('meta', lambda b: self.meta.update(ujson.loads(b))),
('vocab', lambda b: (
self.vocab.from_bytes(b) and _fix_pretrained_vectors_name(self))),
('tokenizer', lambda b: self.tokenizer.from_bytes(b, vocab=False)), ('tokenizer', lambda b: self.tokenizer.from_bytes(b, vocab=False)),
('meta', lambda b: self.meta.update(ujson.loads(b)))
)) ))
_fix_pretrained_vectors_name(self)
for i, (name, proc) in enumerate(self.pipeline): for i, (name, proc) in enumerate(self.pipeline):
if name in disable: if name in disable:
continue continue
@ -696,6 +696,7 @@ class Language(object):
msg = util.from_bytes(bytes_data, deserializers, {}) msg = util.from_bytes(bytes_data, deserializers, {})
return self return self
def _fix_pretrained_vectors_name(nlp): def _fix_pretrained_vectors_name(nlp):
# TODO: Replace this once we handle vectors consistently as static # TODO: Replace this once we handle vectors consistently as static
# data # data
@ -708,12 +709,13 @@ def _fix_pretrained_vectors_name(nlp):
nlp.vocab.vectors.name = vectors_name nlp.vocab.vectors.name = vectors_name
else: else:
raise ValueError(Errors.E092) raise ValueError(Errors.E092)
if nlp.vocab.vectors.size != 0:
link_vectors_to_models(nlp.vocab)
for name, proc in nlp.pipeline: for name, proc in nlp.pipeline:
if not hasattr(proc, 'cfg'): if not hasattr(proc, 'cfg'):
continue continue
if proc.cfg.get('pretrained_dims'): proc.cfg.setdefault('deprecation_fixes', {})
assert nlp.vocab.vectors.name proc.cfg['deprecation_fixes']['vectors_name'] = nlp.vocab.vectors.name
proc.cfg['pretrained_vectors'] = nlp.vocab.vectors.name
class DisabledPipes(list): class DisabledPipes(list):

View File

@ -546,7 +546,10 @@ cdef class Parser:
if len(docs) != len(golds): if len(docs) != len(golds):
raise ValueError(Errors.E077.format(value='update', n_docs=len(docs), raise ValueError(Errors.E077.format(value='update', n_docs=len(docs),
n_golds=len(golds))) n_golds=len(golds)))
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.0: # The probability we use beam update, instead of falling back to
# a greedy update
beam_update_prob = 1-self.cfg.get('beam_update_prob', 0.5)
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= beam_update_prob:
return self.update_beam(docs, golds, return self.update_beam(docs, golds,
self.cfg['beam_width'], self.cfg['beam_density'], self.cfg['beam_width'], self.cfg['beam_density'],
drop=drop, sgd=sgd, losses=losses) drop=drop, sgd=sgd, losses=losses)