mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Update adding languages docs
This commit is contained in:
		
							parent
							
								
									3523715d52
								
							
						
					
					
						commit
						a433e5012a
					
				| 
						 | 
					@ -436,6 +436,8 @@ p
 | 
				
			||||||
 | 
					
 | 
				
			||||||
+h(3, "morph-rules") Morph rules
 | 
					+h(3, "morph-rules") Morph rules
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//- TODO: write morph rules section
 | 
				
			||||||
 | 
					
 | 
				
			||||||
+h(2, "testing") Testing the new language tokenizer
 | 
					+h(2, "testing") Testing the new language tokenizer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
p
 | 
					p
 | 
				
			||||||
| 
						 | 
					@ -626,37 +628,20 @@ p
 | 
				
			||||||
    |  trains the model using #[+a("https://radimrehurek.com/gensim/") Gensim].
 | 
					    |  trains the model using #[+a("https://radimrehurek.com/gensim/") Gensim].
 | 
				
			||||||
    |  The #[code vectors.bin] file should consist of one word and vector per line.
 | 
					    |  The #[code vectors.bin] file should consist of one word and vector per line.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
+h(2, "model-directory") Setting up a model directory
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
p
 | 
					 | 
				
			||||||
    |  Once you've collected the word frequencies, Brown clusters and word
 | 
					 | 
				
			||||||
    |  vectors files, you can use the
 | 
					 | 
				
			||||||
    |  #[+a("/docs/usage/cli#model") #[code model] command] to create a data
 | 
					 | 
				
			||||||
    |  directory:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
+code(false, "bash").
 | 
					 | 
				
			||||||
    python -m spacy model [lang] [model_dir] [freqs_data] [clusters_data] [vectors_data]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
+aside-code("your_data_directory", "yaml").
 | 
					+aside-code("your_data_directory", "yaml").
 | 
				
			||||||
    ├── vocab/
 | 
					    ├── vocab/
 | 
				
			||||||
    |   ├── lexemes.bin   # via nlp.vocab.dump(path)
 | 
					    |   ├── lexemes.bin
 | 
				
			||||||
    |   ├── strings.json  # via nlp.vocab.strings.dump(file_)
 | 
					    |   ├── strings.json
 | 
				
			||||||
    |   └── oov_prob      # optional
 | 
					    |   └── oov_prob
 | 
				
			||||||
    ├── pos/              # optional
 | 
					    ├── pos/
 | 
				
			||||||
    |   ├── model         # via nlp.tagger.model.dump(path)
 | 
					    |   ├── model
 | 
				
			||||||
    |   └── config.json   # via Langage.train
 | 
					    |   └── config.json
 | 
				
			||||||
    ├── deps/             # optional
 | 
					    ├── deps/
 | 
				
			||||||
    |   ├── model         # via nlp.parser.model.dump(path)
 | 
					    |   ├── model
 | 
				
			||||||
    |   └── config.json   # via Langage.train
 | 
					    |   └── config.json
 | 
				
			||||||
    └── ner/              # optional
 | 
					    └── ner/
 | 
				
			||||||
        ├── model         # via nlp.entity.model.dump(path)
 | 
					        ├── model
 | 
				
			||||||
        └── config.json   # via Langage.train
 | 
					        └── config.json
 | 
				
			||||||
 | 
					 | 
				
			||||||
p
 | 
					 | 
				
			||||||
    |  This creates a spaCy data directory with a vocabulary model, ready to be
 | 
					 | 
				
			||||||
    |  loaded. By default, the command expects to be able to find your language
 | 
					 | 
				
			||||||
    |  class using #[code spacy.util.get_lang_class(lang_id)].
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
+h(2, "train-tagger-parser") Training the tagger and parser
 | 
					+h(2, "train-tagger-parser") Training the tagger and parser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user