mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
		
						commit
						6d301fa06f
					
				
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							|  | @ -183,7 +183,7 @@ def setup_package(): | |||
|             name=about['__title__'], | ||||
|             zip_safe=False, | ||||
|             packages=PACKAGES, | ||||
|             package_data={'': ['*.pyx', '*.pxd', '*.txt', '*.tokens', 'data']}, | ||||
|             package_data={'': ['*.pyx', '*.pxd', '*.txt', '*.tokens']}, | ||||
|             description=about['__summary__'], | ||||
|             long_description=readme, | ||||
|             author=about['__author__'], | ||||
|  |  | |||
|  | @ -4,7 +4,7 @@ | |||
| # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py | ||||
| 
 | ||||
| __title__ = 'spacy' | ||||
| __version__ = '1.0.4' | ||||
| __version__ = '1.0.5' | ||||
| __summary__ = 'Industrial-strength NLP' | ||||
| __uri__ = 'https://spacy.io' | ||||
| __author__ = 'Matthew Honnibal' | ||||
|  |  | |||
|  | @ -10,10 +10,7 @@ from os import path | |||
| 
 | ||||
| from libc.string cimport memset | ||||
| 
 | ||||
| try: | ||||
|     import ujson as json | ||||
| except ImportError: | ||||
|     import json | ||||
| import ujson as json | ||||
| 
 | ||||
| from .syntax import nonproj | ||||
| 
 | ||||
|  | @ -149,7 +146,7 @@ def read_json_file(loc, docs_filter=None): | |||
|         for filename in os.listdir(loc): | ||||
|             yield from read_json_file(path.join(loc, filename)) | ||||
|     else: | ||||
|         with open(loc) as file_: | ||||
|         with io.open(loc, 'r', encoding='utf8') as file_: | ||||
|             docs = json.load(file_) | ||||
|         for doc in docs: | ||||
|             if docs_filter is not None and not docs_filter(doc): | ||||
|  |  | |||
|  | @ -2,10 +2,7 @@ from __future__ import unicode_literals, print_function | |||
| import codecs | ||||
| import pathlib | ||||
| 
 | ||||
| try: | ||||
|     import ujson as json | ||||
| except ImportError: | ||||
|     import json | ||||
| import ujson as json | ||||
| 
 | ||||
| from .parts_of_speech import NOUN, VERB, ADJ, PUNCT | ||||
| 
 | ||||
|  | @ -28,7 +25,7 @@ class Lemmatizer(object): | |||
|                     exc[pos] = read_exc(file_) | ||||
|             else: | ||||
|                 exc[pos] = {} | ||||
|         with (path / 'vocab' / 'lemma_rules.json').open('rb') as file_: | ||||
|         with (path / 'vocab' / 'lemma_rules.json').open('r', encoding='utf8') as file_: | ||||
|             rules = json.load(file_) | ||||
|         return cls(index, exc, rules) | ||||
| 
 | ||||
|  |  | |||
|  | @ -175,7 +175,7 @@ cdef class Matcher: | |||
|     @classmethod | ||||
|     def load(cls, path, vocab): | ||||
|         if (path / 'gazetteer.json').exists(): | ||||
|             with (path / 'gazetteer.json').open('rb') as file_: | ||||
|             with (path / 'gazetteer.json').open('r', encoding='utf8') as file_: | ||||
|                 patterns = json.load(file_) | ||||
|         else: | ||||
|             patterns = {} | ||||
|  |  | |||
|  | @ -57,24 +57,24 @@ cdef class Vocab: | |||
|                 "vectors argument to Vocab.load() deprecated. " | ||||
|                 "Install vectors after loading.") | ||||
|         if tag_map is True and (path / 'vocab' / 'tag_map.json').exists(): | ||||
|             with (path / 'vocab' / 'tag_map.json').open() as file_: | ||||
|             with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_: | ||||
|                 tag_map = json.load(file_) | ||||
|         if lex_attr_getters is not None \ | ||||
|         and oov_prob is True \ | ||||
|         and (path / 'vocab' / 'oov_prob').exists(): | ||||
|             with (path / 'vocab' / 'oov_prob').open() as file_: | ||||
|             with (path / 'vocab' / 'oov_prob').open('r', encoding='utf8') as file_: | ||||
|                 oov_prob = float(file_.read()) | ||||
|             lex_attr_getters[PROB] = lambda text: oov_prob | ||||
|         if lemmatizer is True: | ||||
|             lemmatizer = Lemmatizer.load(path) | ||||
|         if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists(): | ||||
|             with (path / 'vocab' / 'serializer.json').open() as file_: | ||||
|             with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_: | ||||
|                 serializer_freqs = json.load(file_) | ||||
| 
 | ||||
|         cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map, | ||||
|                               lemmatizer=lemmatizer, serializer_freqs=serializer_freqs) | ||||
| 
 | ||||
|         with (path / 'vocab' / 'strings.json').open() as file_: | ||||
|         with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_: | ||||
|             self.strings.load(file_) | ||||
|         self.load_lexemes(path / 'vocab' / 'lexemes.bin') | ||||
|         return self | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user