Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2017-08-25 12:37:16 -05:00
commit 6d4e8e14ca
5 changed files with 32 additions and 21 deletions

View File

@ -382,7 +382,8 @@ def fine_tune(embedding, combine=None):
model.d_mix[1] += flat_vecs.dot(flat_grad.T).sum() model.d_mix[1] += flat_vecs.dot(flat_grad.T).sum()
bp_vecs([d_o * model.mix[1] for d_o in d_output], sgd=sgd) bp_vecs([d_o * model.mix[1] for d_o in d_output], sgd=sgd)
sgd(model._mem.weights, model._mem.gradient, key=model.id) if sgd is not None:
sgd(model._mem.weights, model._mem.gradient, key=model.id)
return [d_o * model.mix[0] for d_o in d_output] return [d_o * model.mix[0] for d_o in d_output]
return output, fine_tune_bwd return output, fine_tune_bwd

View File

@ -24,28 +24,29 @@ def download(cmd, model, direct=False):
with version. with version.
""" """
if direct: if direct:
download_model('{m}/{m}.tar.gz'.format(m=model)) dl = download_model('{m}/{m}.tar.gz'.format(m=model))
else: else:
shortcuts = get_json(about.__shortcuts__, "available shortcuts") shortcuts = get_json(about.__shortcuts__, "available shortcuts")
model_name = shortcuts.get(model, model) model_name = shortcuts.get(model, model)
compatibility = get_compatibility() compatibility = get_compatibility()
version = get_version(model_name, compatibility) version = get_version(model_name, compatibility)
download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version)) dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
try: if dl == 0:
# Get package path here because link uses try:
# pip.get_installed_distributions() to check if model is a package, # Get package path here because link uses
# which fails if model was just installed via subprocess # pip.get_installed_distributions() to check if model is a package,
package_path = get_package_path(model_name) # which fails if model was just installed via subprocess
link(None, model_name, model, force=True, model_path=package_path) package_path = get_package_path(model_name)
except: link(None, model_name, model, force=True, model_path=package_path)
# Dirty, but since spacy.download and the auto-linking is mostly except:
# a convenience wrapper, it's best to show a success message and # Dirty, but since spacy.download and the auto-linking is mostly
# loading instructions, even if linking fails. # a convenience wrapper, it's best to show a success message and
prints("Creating a shortcut link for 'en' didn't work (maybe you " # loading instructions, even if linking fails.
"don't have admin permissions?), but you can still load " prints("Creating a shortcut link for 'en' didn't work (maybe you "
"the model via its full package name:", "don't have admin permissions?), but you can still load "
"nlp = spacy.load('%s')" % model_name, "the model via its full package name:",
title="Download successful") "nlp = spacy.load('%s')" % model_name,
title="Download successful")
def get_json(url, desc): def get_json(url, desc):
@ -77,6 +78,6 @@ def get_version(model, comp):
def download_model(filename): def download_model(filename):
download_url = about.__download_url__ + '/' + filename download_url = about.__download_url__ + '/' + filename
subprocess.call([sys.executable, '-m', return subprocess.call([sys.executable, '-m',
'pip', 'install', '--no-cache-dir', download_url], 'pip', 'install', '--no-cache-dir', download_url],
env=os.environ.copy()) env=os.environ.copy())

View File

@ -137,6 +137,7 @@ LEX_ATTRS = {
attrs.IS_UPPER: lambda string: string.isupper(), attrs.IS_UPPER: lambda string: string.isupper(),
attrs.IS_STOP: lambda string: False, attrs.IS_STOP: lambda string: False,
attrs.IS_OOV: lambda string: True, attrs.IS_OOV: lambda string: True,
attrs.PROB: lambda string: -20.,
attrs.LIKE_EMAIL: like_email, attrs.LIKE_EMAIL: like_email,
attrs.LIKE_NUM: like_num, attrs.LIKE_NUM: like_num,
attrs.IS_PUNCT: is_punct, attrs.IS_PUNCT: is_punct,

View File

@ -171,6 +171,8 @@ cdef class Lexeme:
property rank: property rank:
def __get__(self): def __get__(self):
return self.c.id return self.c.id
def __set__(self, value):
self.c.id = value
property sentiment: property sentiment:
def __get__(self): def __get__(self):

View File

@ -20,7 +20,7 @@ from .transition_system cimport do_func_t, get_cost_func_t
from .transition_system cimport move_cost_func_t, label_cost_func_t from .transition_system cimport move_cost_func_t, label_cost_func_t
from ..gold cimport GoldParse from ..gold cimport GoldParse
from ..gold cimport GoldParseC from ..gold cimport GoldParseC
from ..attrs cimport TAG, HEAD, DEP, ENT_IOB, ENT_TYPE, IS_SPACE from ..attrs cimport TAG, HEAD, DEP, ENT_IOB, ENT_TYPE, IS_SPACE, IS_PUNCT
from ..lexeme cimport Lexeme from ..lexeme cimport Lexeme
from ..structs cimport TokenC from ..structs cimport TokenC
@ -249,7 +249,13 @@ cdef class Break:
elif st.stack_depth() < 1: elif st.stack_depth() < 1:
return False return False
else: else:
return True prev = st.B_(0)-1
if Lexeme.c_check_flag(prev.lex, IS_PUNCT):
return True
elif Lexeme.c_check_flag(prev.lex, IS_SPACE):
return True
else:
return False
@staticmethod @staticmethod
cdef int transition(StateC* st, attr_t label) nogil: cdef int transition(StateC* st, attr_t label) nogil: