Merge branch 'master' of https://github.com/explosion/spaCy

2025-07-19 04:32:32 +03:00 · 2018-04-06 00:38:48 +02:00 · 2018-04-06 00:38:48 +02:00 · fb4eda6616
commit fb4eda6616
parent 3463ded7cf 0c7fab4443
4 changed files with 18 additions and 13 deletions
--- a/spacy/about.py
+++ b/spacy/about.py
@ -3,13 +3,13 @@
 # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py

 __title__ = 'spacy'
-__version__ = '2.0.11.dev0'
+__version__ = '2.0.11'
 __summary__ = 'Industrial-strength Natural Language Processing (NLP) with Python and Cython'
 __uri__ = 'https://spacy.io'
 __author__ = 'Explosion AI'
 __email__ = 'contact@explosion.ai'
 __license__ = 'MIT'
-__release__ = False
+__release__ = True

 __download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
 __compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json'
--- a/spacy/lang/it/lemmatizer.py
+++ b/spacy/lang/it/lemmatizer.py
@ -333680,7 +333680,7 @@ LOOKUP = {
    "zurliniane": "zurliniano",
    "zurliniani": "zurliniano",
    "àncore": "àncora",
-    "sono": "essere"
+    "sono": "essere",
    "è": "essere",
    "èlites": "èlite",
    "ère": "èra",
--- a/spacy/language.py
+++ b/spacy/language.py
@ -636,11 +636,11 @@ class Language(object):
        """
        path = util.ensure_path(path)
        deserializers = OrderedDict((
-            ('vocab', lambda p: self.vocab.from_disk(p)),
+            ('meta.json', lambda p: self.meta.update(util.read_json(p))),
+            ('vocab', lambda p: (
+                self.vocab.from_disk(p) and _fix_pretrained_vectors_name(self))),
            ('tokenizer', lambda p: self.tokenizer.from_disk(p, vocab=False)),
-            ('meta.json', lambda p: self.meta.update(util.read_json(p)))
        ))
-        _fix_pretrained_vectors_name(self)
        for name, proc in self.pipeline:
            if name in disable:
                continue
@ -682,11 +682,11 @@ class Language(object):
        RETURNS (Language): The `Language` object.
        """
        deserializers = OrderedDict((
-            ('vocab', lambda b: self.vocab.from_bytes(b)),
+            ('meta', lambda b: self.meta.update(ujson.loads(b))),
+            ('vocab', lambda b: (
+                self.vocab.from_bytes(b) and _fix_pretrained_vectors_name(self))),
            ('tokenizer', lambda b: self.tokenizer.from_bytes(b, vocab=False)),
-            ('meta', lambda b: self.meta.update(ujson.loads(b)))
        ))
-        _fix_pretrained_vectors_name(self)
        for i, (name, proc) in enumerate(self.pipeline):
            if name in disable:
                continue
@ -696,6 +696,7 @@ class Language(object):
        msg = util.from_bytes(bytes_data, deserializers, {})
        return self

+
 def _fix_pretrained_vectors_name(nlp):
    # TODO: Replace this once we handle vectors consistently as static
    # data
@ -708,12 +709,13 @@ def _fix_pretrained_vectors_name(nlp):
        nlp.vocab.vectors.name = vectors_name
    else:
        raise ValueError(Errors.E092)
+    if nlp.vocab.vectors.size != 0:
+        link_vectors_to_models(nlp.vocab)
    for name, proc in nlp.pipeline:
        if not hasattr(proc, 'cfg'):
            continue
-        if proc.cfg.get('pretrained_dims'):
-            assert nlp.vocab.vectors.name
-            proc.cfg['pretrained_vectors'] = nlp.vocab.vectors.name
+        proc.cfg.setdefault('deprecation_fixes', {})
+        proc.cfg['deprecation_fixes']['vectors_name'] = nlp.vocab.vectors.name


 class DisabledPipes(list):
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -546,7 +546,10 @@ cdef class Parser:
        if len(docs) != len(golds):
            raise ValueError(Errors.E077.format(value='update', n_docs=len(docs),
                                                n_golds=len(golds)))
-        if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.0:
+        # The probability we use beam update, instead of falling back to
+        # a greedy update
+        beam_update_prob = 1-self.cfg.get('beam_update_prob', 0.5)
+        if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= beam_update_prob:
            return self.update_beam(docs, golds,
                    self.cfg['beam_width'], self.cfg['beam_density'],
                    drop=drop, sgd=sgd, losses=losses)