diff --git a/spacy/_ml.py b/spacy/_ml.py index 76d8fd678..8adacdfda 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -382,7 +382,8 @@ def fine_tune(embedding, combine=None): model.d_mix[1] += flat_vecs.dot(flat_grad.T).sum() bp_vecs([d_o * model.mix[1] for d_o in d_output], sgd=sgd) - sgd(model._mem.weights, model._mem.gradient, key=model.id) + if sgd is not None: + sgd(model._mem.weights, model._mem.gradient, key=model.id) return [d_o * model.mix[0] for d_o in d_output] return output, fine_tune_bwd diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 675ae8cee..28ae07865 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -24,28 +24,29 @@ def download(cmd, model, direct=False): with version. """ if direct: - download_model('{m}/{m}.tar.gz'.format(m=model)) + dl = download_model('{m}/{m}.tar.gz'.format(m=model)) else: shortcuts = get_json(about.__shortcuts__, "available shortcuts") model_name = shortcuts.get(model, model) compatibility = get_compatibility() version = get_version(model_name, compatibility) - download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version)) - try: - # Get package path here because link uses - # pip.get_installed_distributions() to check if model is a package, - # which fails if model was just installed via subprocess - package_path = get_package_path(model_name) - link(None, model_name, model, force=True, model_path=package_path) - except: - # Dirty, but since spacy.download and the auto-linking is mostly - # a convenience wrapper, it's best to show a success message and - # loading instructions, even if linking fails. - prints("Creating a shortcut link for 'en' didn't work (maybe you " - "don't have admin permissions?), but you can still load " - "the model via its full package name:", - "nlp = spacy.load('%s')" % model_name, - title="Download successful") + dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version)) + if dl == 0: + try: + # Get package path here because link uses + # pip.get_installed_distributions() to check if model is a package, + # which fails if model was just installed via subprocess + package_path = get_package_path(model_name) + link(None, model_name, model, force=True, model_path=package_path) + except: + # Dirty, but since spacy.download and the auto-linking is mostly + # a convenience wrapper, it's best to show a success message and + # loading instructions, even if linking fails. + prints("Creating a shortcut link for 'en' didn't work (maybe you " + "don't have admin permissions?), but you can still load " + "the model via its full package name:", + "nlp = spacy.load('%s')" % model_name, + title="Download successful") def get_json(url, desc): @@ -77,6 +78,6 @@ def get_version(model, comp): def download_model(filename): download_url = about.__download_url__ + '/' + filename - subprocess.call([sys.executable, '-m', + return subprocess.call([sys.executable, '-m', 'pip', 'install', '--no-cache-dir', download_url], env=os.environ.copy()) diff --git a/spacy/lang/lex_attrs.py b/spacy/lang/lex_attrs.py index 4c3284b1e..d4beebd26 100644 --- a/spacy/lang/lex_attrs.py +++ b/spacy/lang/lex_attrs.py @@ -137,6 +137,7 @@ LEX_ATTRS = { attrs.IS_UPPER: lambda string: string.isupper(), attrs.IS_STOP: lambda string: False, attrs.IS_OOV: lambda string: True, + attrs.PROB: lambda string: -20., attrs.LIKE_EMAIL: like_email, attrs.LIKE_NUM: like_num, attrs.IS_PUNCT: is_punct, diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index bcd84d184..f0f5c6398 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -171,6 +171,8 @@ cdef class Lexeme: property rank: def __get__(self): return self.c.id + def __set__(self, value): + self.c.id = value property sentiment: def __get__(self): diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index aab350d76..cdf17857b 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -20,7 +20,7 @@ from .transition_system cimport do_func_t, get_cost_func_t from .transition_system cimport move_cost_func_t, label_cost_func_t from ..gold cimport GoldParse from ..gold cimport GoldParseC -from ..attrs cimport TAG, HEAD, DEP, ENT_IOB, ENT_TYPE, IS_SPACE +from ..attrs cimport TAG, HEAD, DEP, ENT_IOB, ENT_TYPE, IS_SPACE, IS_PUNCT from ..lexeme cimport Lexeme from ..structs cimport TokenC @@ -249,7 +249,13 @@ cdef class Break: elif st.stack_depth() < 1: return False else: - return True + prev = st.B_(0)-1 + if Lexeme.c_check_flag(prev.lex, IS_PUNCT): + return True + elif Lexeme.c_check_flag(prev.lex, IS_SPACE): + return True + else: + return False @staticmethod cdef int transition(StateC* st, attr_t label) nogil: