Merge branch 'master' of ssh://github.com/spacy-io/spaCy

This commit is contained in:
Matthew Honnibal 2016-09-21 12:08:25 +02:00
commit 6b202ec43f
4 changed files with 11 additions and 10 deletions

View File

@ -5,7 +5,7 @@ cymem>=1.30,<1.32
preshed>=0.46.1,<0.47.0
thinc>=5.0.0,<5.1.0
murmurhash>=0.26,<0.27
plac
plac<0.9.3
six
ujson
cloudpickle

View File

@ -9,15 +9,16 @@ from sputnik.package_list import (PackageNotFoundException,
from . import about
def download(lang, force=False):
def download(lang, force=False, fail_on_exist=True):
if force:
sputnik.purge(about.__title__, about.__version__)
try:
sputnik.package(about.__title__, about.__version__, about.__models__[lang])
print("Model already installed. Please run 'python -m "
"spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
sys.exit(1)
if fail_on_exist:
print("Model already installed. Please run 'python -m "
"spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
sys.exit(0)
except (PackageNotFoundException, CompatiblePackageNotFoundException):
pass

View File

@ -37,7 +37,7 @@ p Here's how the current pre-processing function looks, at the time of writing.
# Merge them into single tokens
ent.merge(ent.root.tag_, ent.text, ent.label_)
token_strings = []
for token in tokens:
for token in doc:
text = token.text.replace(' ', '_')
tag = token.ent_type_ or token.pos_
token_strings.append('%s|%s' % (text, tag))

View File

@ -46,13 +46,13 @@
assert token.shape_ == 'Xxxxx'
for lexeme in nlp.vocab:
if lexeme.is_alpha:
lexeme.shape_ = 'W'
lexeme.shape_ = u'W'
elif lexeme.is_digit:
lexeme.shape_ = 'D'
lexeme.shape_ = u'D'
elif lexeme.is_punct:
lexeme.shape_ = 'P'
lexeme.shape_ = u'P'
else:
lexeme.shape_ = 'M'
lexeme.shape_ = u'M'
assert token.shape_ == 'W'
+h3('examples-numpy-arrays') Export to numpy arrays