mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-24 20:51:30 +03:00 
			
		
		
		
	Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
		
						commit
						ed39c75a92
					
				|  | @ -1,6 +1,7 @@ | |||
| # coding: utf8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| 
 | ||||
| class Messages(object): | ||||
|     M001 = ("Download successful but linking failed") | ||||
|     M002 = ("Creating a shortcut link for 'en' didn't work (maybe you " | ||||
|  | @ -64,7 +65,7 @@ class Messages(object): | |||
|             "flag to overwrite existing directories.") | ||||
|     M046 = ("Generating meta.json") | ||||
|     M047 = ("Enter the package settings for your model. The following " | ||||
|            "information will be read from your model data: pipeline, vectors.") | ||||
|             "information will be read from your model data: pipeline, vectors.") | ||||
|     M048 = ("No '{key}' setting found in meta.json") | ||||
|     M049 = ("This setting is required to build your package.") | ||||
|     M050 = ("Training data not found") | ||||
|  |  | |||
|  | @ -29,12 +29,14 @@ except ImportError: | |||
|     freqs_loc=("location of words frequencies file", "positional", None, Path), | ||||
|     clusters_loc=("optional: location of brown clusters data", | ||||
|                   "option", "c", str), | ||||
|     vectors_loc=("optional: location of vectors file in GenSim text format", | ||||
|                  "option", "v", str), | ||||
|     vectors_loc=("optional: location of vectors file in Word2Vec format " | ||||
|                  "(either as .txt or zipped as .zip or .tar.gz)", "option", | ||||
|                  "v", str), | ||||
|     prune_vectors=("optional: number of vectors to prune to", | ||||
|                    "option", "V", int) | ||||
| ) | ||||
| def init_model(lang, output_dir, freqs_loc=None, clusters_loc=None, vectors_loc=None, prune_vectors=-1): | ||||
| def init_model(lang, output_dir, freqs_loc=None, clusters_loc=None, | ||||
|                vectors_loc=None, prune_vectors=-1): | ||||
|     """ | ||||
|     Create a new model from raw data, like word frequencies, Brown clusters | ||||
|     and word vectors. | ||||
|  | @ -114,7 +116,7 @@ def read_vectors(vectors_loc): | |||
|         pieces = line.rsplit(' ', vectors_data.shape[1]+1) | ||||
|         word = pieces.pop(0) | ||||
|         if len(pieces) != vectors_data.shape[1]: | ||||
|             raise ValueError(Errors.E094.format(line_num=i, loc=vectors_loc) | ||||
|             raise ValueError(Errors.E094.format(line_num=i, loc=vectors_loc)) | ||||
|         vectors_data[i] = numpy.asarray(pieces, dtype='f') | ||||
|         vectors_keys.append(word) | ||||
|     return vectors_data, vectors_keys | ||||
|  |  | |||
|  | @ -533,8 +533,10 @@ p | |||
|         +cell option | ||||
|         +cell | ||||
|             |  Optional location of vectors file. Should be a tab-separated | ||||
|             |  file where the first column contains the word and the remaining | ||||
|             |  columns the values. | ||||
|             |  file in Word2Vec format where the first column contains the word | ||||
|             |  and the remaining columns the values. File can be provided in | ||||
|             |  #[code .txt] format or as a zipped text file in #[code .zip] or | ||||
|             |  #[code .tar.gz] format. | ||||
| 
 | ||||
|     +row | ||||
|         +cell #[code --prune-vectors], #[code -V] | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user