mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-13 13:17:06 +03:00
Merge branch 'master' of ssh://github.com/spacy-io/spaCy
This commit is contained in:
commit
6b202ec43f
|
@ -5,7 +5,7 @@ cymem>=1.30,<1.32
|
||||||
preshed>=0.46.1,<0.47.0
|
preshed>=0.46.1,<0.47.0
|
||||||
thinc>=5.0.0,<5.1.0
|
thinc>=5.0.0,<5.1.0
|
||||||
murmurhash>=0.26,<0.27
|
murmurhash>=0.26,<0.27
|
||||||
plac
|
plac<0.9.3
|
||||||
six
|
six
|
||||||
ujson
|
ujson
|
||||||
cloudpickle
|
cloudpickle
|
||||||
|
|
|
@ -9,15 +9,16 @@ from sputnik.package_list import (PackageNotFoundException,
|
||||||
from . import about
|
from . import about
|
||||||
|
|
||||||
|
|
||||||
def download(lang, force=False):
|
def download(lang, force=False, fail_on_exist=True):
|
||||||
if force:
|
if force:
|
||||||
sputnik.purge(about.__title__, about.__version__)
|
sputnik.purge(about.__title__, about.__version__)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sputnik.package(about.__title__, about.__version__, about.__models__[lang])
|
sputnik.package(about.__title__, about.__version__, about.__models__[lang])
|
||||||
print("Model already installed. Please run 'python -m "
|
if fail_on_exist:
|
||||||
"spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
|
print("Model already installed. Please run 'python -m "
|
||||||
sys.exit(1)
|
"spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
|
||||||
|
sys.exit(0)
|
||||||
except (PackageNotFoundException, CompatiblePackageNotFoundException):
|
except (PackageNotFoundException, CompatiblePackageNotFoundException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,7 @@ p Here's how the current pre-processing function looks, at the time of writing.
|
||||||
# Merge them into single tokens
|
# Merge them into single tokens
|
||||||
ent.merge(ent.root.tag_, ent.text, ent.label_)
|
ent.merge(ent.root.tag_, ent.text, ent.label_)
|
||||||
token_strings = []
|
token_strings = []
|
||||||
for token in tokens:
|
for token in doc:
|
||||||
text = token.text.replace(' ', '_')
|
text = token.text.replace(' ', '_')
|
||||||
tag = token.ent_type_ or token.pos_
|
tag = token.ent_type_ or token.pos_
|
||||||
token_strings.append('%s|%s' % (text, tag))
|
token_strings.append('%s|%s' % (text, tag))
|
||||||
|
|
|
@ -46,13 +46,13 @@
|
||||||
assert token.shape_ == 'Xxxxx'
|
assert token.shape_ == 'Xxxxx'
|
||||||
for lexeme in nlp.vocab:
|
for lexeme in nlp.vocab:
|
||||||
if lexeme.is_alpha:
|
if lexeme.is_alpha:
|
||||||
lexeme.shape_ = 'W'
|
lexeme.shape_ = u'W'
|
||||||
elif lexeme.is_digit:
|
elif lexeme.is_digit:
|
||||||
lexeme.shape_ = 'D'
|
lexeme.shape_ = u'D'
|
||||||
elif lexeme.is_punct:
|
elif lexeme.is_punct:
|
||||||
lexeme.shape_ = 'P'
|
lexeme.shape_ = u'P'
|
||||||
else:
|
else:
|
||||||
lexeme.shape_ = 'M'
|
lexeme.shape_ = u'M'
|
||||||
assert token.shape_ == 'W'
|
assert token.shape_ == 'W'
|
||||||
|
|
||||||
+h3('examples-numpy-arrays') Export to numpy arrays
|
+h3('examples-numpy-arrays') Export to numpy arrays
|
||||||
|
|
Loading…
Reference in New Issue
Block a user