mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	* Update get_freqs.py script
This commit is contained in:
		
							parent
							
								
									5ff4454177
								
							
						
					
					
						commit
						17fffb4c57
					
				|  | @ -12,7 +12,7 @@ import codecs | |||
| from preshed.counter import PreshCounter | ||||
| from joblib import Parallel, delayed | ||||
| 
 | ||||
| import spacy.en | ||||
| from spacy.en import English | ||||
| from spacy.strings import StringStore | ||||
| from spacy.attrs import ORTH | ||||
| from spacy.tokenizer import Tokenizer | ||||
|  | @ -27,8 +27,9 @@ def iter_comments(loc): | |||
| 
 | ||||
| def count_freqs(input_loc, output_loc): | ||||
|     print(output_loc) | ||||
|     tokenizer = Tokenizer.from_dir(Vocab(), | ||||
|                     path.join(spacy.en.English.default_data_dir(), 'tokenizer')) | ||||
|     vocab = English.default_vocab(get_lex_attr=None) | ||||
|     tokenizer = Tokenizer.from_dir(vocab, | ||||
|                     path.join(English.default_data_dir(), 'tokenizer')) | ||||
| 
 | ||||
|     counts = PreshCounter() | ||||
|     for json_comment in iter_comments(input_loc): | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user