mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	Merge branch 'master' of ssh://github.com/spacy-io/spaCy
This commit is contained in:
		
						commit
						6b202ec43f
					
				|  | @ -5,7 +5,7 @@ cymem>=1.30,<1.32 | ||||||
| preshed>=0.46.1,<0.47.0 | preshed>=0.46.1,<0.47.0 | ||||||
| thinc>=5.0.0,<5.1.0 | thinc>=5.0.0,<5.1.0 | ||||||
| murmurhash>=0.26,<0.27 | murmurhash>=0.26,<0.27 | ||||||
| plac | plac<0.9.3 | ||||||
| six | six | ||||||
| ujson | ujson | ||||||
| cloudpickle | cloudpickle | ||||||
|  |  | ||||||
|  | @ -9,15 +9,16 @@ from sputnik.package_list import (PackageNotFoundException, | ||||||
| from . import about | from . import about | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def download(lang, force=False): | def download(lang, force=False, fail_on_exist=True): | ||||||
|     if force: |     if force: | ||||||
|         sputnik.purge(about.__title__, about.__version__) |         sputnik.purge(about.__title__, about.__version__) | ||||||
| 
 | 
 | ||||||
|     try: |     try: | ||||||
|         sputnik.package(about.__title__, about.__version__, about.__models__[lang]) |         sputnik.package(about.__title__, about.__version__, about.__models__[lang]) | ||||||
|         print("Model already installed. Please run 'python -m " |         if fail_on_exist: | ||||||
|               "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr) |             print("Model already installed. Please run 'python -m " | ||||||
|         sys.exit(1) |                   "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr) | ||||||
|  |             sys.exit(0) | ||||||
|     except (PackageNotFoundException, CompatiblePackageNotFoundException): |     except (PackageNotFoundException, CompatiblePackageNotFoundException): | ||||||
|         pass |         pass | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -37,7 +37,7 @@ p Here's how the current pre-processing function looks, at the time of writing. | ||||||
|                         # Merge them into single tokens |                         # Merge them into single tokens | ||||||
|                         ent.merge(ent.root.tag_, ent.text, ent.label_) |                         ent.merge(ent.root.tag_, ent.text, ent.label_) | ||||||
|             token_strings = [] |             token_strings = [] | ||||||
|             for token in tokens: |             for token in doc: | ||||||
|                 text = token.text.replace(' ', '_') |                 text = token.text.replace(' ', '_') | ||||||
|                 tag = token.ent_type_ or token.pos_ |                 tag = token.ent_type_ or token.pos_ | ||||||
|                 token_strings.append('%s|%s' % (text, tag)) |                 token_strings.append('%s|%s' % (text, tag)) | ||||||
|  |  | ||||||
|  | @ -46,13 +46,13 @@ | ||||||
|         assert token.shape_ == 'Xxxxx' |         assert token.shape_ == 'Xxxxx' | ||||||
|         for lexeme in nlp.vocab: |         for lexeme in nlp.vocab: | ||||||
|             if lexeme.is_alpha: |             if lexeme.is_alpha: | ||||||
|                 lexeme.shape_ = 'W' |                 lexeme.shape_ = u'W' | ||||||
|             elif lexeme.is_digit: |             elif lexeme.is_digit: | ||||||
|                 lexeme.shape_ = 'D' |                 lexeme.shape_ = u'D' | ||||||
|             elif lexeme.is_punct: |             elif lexeme.is_punct: | ||||||
|                 lexeme.shape_ = 'P' |                 lexeme.shape_ = u'P' | ||||||
|             else: |             else: | ||||||
|                 lexeme.shape_ = 'M' |                 lexeme.shape_ = u'M' | ||||||
|         assert token.shape_ == 'W' |         assert token.shape_ == 'W' | ||||||
| 
 | 
 | ||||||
|     +h3('examples-numpy-arrays') Export to numpy arrays |     +h3('examples-numpy-arrays') Export to numpy arrays | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user