Merge branch 'master' of ssh://github.com/spacy-io/spaCy

This commit is contained in:
Matthew Honnibal 2016-09-21 12:08:25 +02:00
commit 6b202ec43f
4 changed files with 11 additions and 10 deletions

View File

@ -5,7 +5,7 @@ cymem>=1.30,<1.32
preshed>=0.46.1,<0.47.0 preshed>=0.46.1,<0.47.0
thinc>=5.0.0,<5.1.0 thinc>=5.0.0,<5.1.0
murmurhash>=0.26,<0.27 murmurhash>=0.26,<0.27
plac plac<0.9.3
six six
ujson ujson
cloudpickle cloudpickle

View File

@ -9,15 +9,16 @@ from sputnik.package_list import (PackageNotFoundException,
from . import about from . import about
def download(lang, force=False): def download(lang, force=False, fail_on_exist=True):
if force: if force:
sputnik.purge(about.__title__, about.__version__) sputnik.purge(about.__title__, about.__version__)
try: try:
sputnik.package(about.__title__, about.__version__, about.__models__[lang]) sputnik.package(about.__title__, about.__version__, about.__models__[lang])
print("Model already installed. Please run 'python -m " if fail_on_exist:
"spacy.%s.download --force' to reinstall." % lang, file=sys.stderr) print("Model already installed. Please run 'python -m "
sys.exit(1) "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
sys.exit(0)
except (PackageNotFoundException, CompatiblePackageNotFoundException): except (PackageNotFoundException, CompatiblePackageNotFoundException):
pass pass

View File

@ -37,7 +37,7 @@ p Here's how the current pre-processing function looks, at the time of writing.
# Merge them into single tokens # Merge them into single tokens
ent.merge(ent.root.tag_, ent.text, ent.label_) ent.merge(ent.root.tag_, ent.text, ent.label_)
token_strings = [] token_strings = []
for token in tokens: for token in doc:
text = token.text.replace(' ', '_') text = token.text.replace(' ', '_')
tag = token.ent_type_ or token.pos_ tag = token.ent_type_ or token.pos_
token_strings.append('%s|%s' % (text, tag)) token_strings.append('%s|%s' % (text, tag))

View File

@ -46,13 +46,13 @@
assert token.shape_ == 'Xxxxx' assert token.shape_ == 'Xxxxx'
for lexeme in nlp.vocab: for lexeme in nlp.vocab:
if lexeme.is_alpha: if lexeme.is_alpha:
lexeme.shape_ = 'W' lexeme.shape_ = u'W'
elif lexeme.is_digit: elif lexeme.is_digit:
lexeme.shape_ = 'D' lexeme.shape_ = u'D'
elif lexeme.is_punct: elif lexeme.is_punct:
lexeme.shape_ = 'P' lexeme.shape_ = u'P'
else: else:
lexeme.shape_ = 'M' lexeme.shape_ = u'M'
assert token.shape_ == 'W' assert token.shape_ == 'W'
+h3('examples-numpy-arrays') Export to numpy arrays +h3('examples-numpy-arrays') Export to numpy arrays