mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
		
						commit
						6d301fa06f
					
				
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							|  | @ -183,7 +183,7 @@ def setup_package(): | ||||||
|             name=about['__title__'], |             name=about['__title__'], | ||||||
|             zip_safe=False, |             zip_safe=False, | ||||||
|             packages=PACKAGES, |             packages=PACKAGES, | ||||||
|             package_data={'': ['*.pyx', '*.pxd', '*.txt', '*.tokens', 'data']}, |             package_data={'': ['*.pyx', '*.pxd', '*.txt', '*.tokens']}, | ||||||
|             description=about['__summary__'], |             description=about['__summary__'], | ||||||
|             long_description=readme, |             long_description=readme, | ||||||
|             author=about['__author__'], |             author=about['__author__'], | ||||||
|  |  | ||||||
|  | @ -4,7 +4,7 @@ | ||||||
| # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py | # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py | ||||||
| 
 | 
 | ||||||
| __title__ = 'spacy' | __title__ = 'spacy' | ||||||
| __version__ = '1.0.4' | __version__ = '1.0.5' | ||||||
| __summary__ = 'Industrial-strength NLP' | __summary__ = 'Industrial-strength NLP' | ||||||
| __uri__ = 'https://spacy.io' | __uri__ = 'https://spacy.io' | ||||||
| __author__ = 'Matthew Honnibal' | __author__ = 'Matthew Honnibal' | ||||||
|  |  | ||||||
|  | @ -10,10 +10,7 @@ from os import path | ||||||
| 
 | 
 | ||||||
| from libc.string cimport memset | from libc.string cimport memset | ||||||
| 
 | 
 | ||||||
| try: | import ujson as json | ||||||
|     import ujson as json |  | ||||||
| except ImportError: |  | ||||||
|     import json |  | ||||||
| 
 | 
 | ||||||
| from .syntax import nonproj | from .syntax import nonproj | ||||||
| 
 | 
 | ||||||
|  | @ -149,7 +146,7 @@ def read_json_file(loc, docs_filter=None): | ||||||
|         for filename in os.listdir(loc): |         for filename in os.listdir(loc): | ||||||
|             yield from read_json_file(path.join(loc, filename)) |             yield from read_json_file(path.join(loc, filename)) | ||||||
|     else: |     else: | ||||||
|         with open(loc) as file_: |         with io.open(loc, 'r', encoding='utf8') as file_: | ||||||
|             docs = json.load(file_) |             docs = json.load(file_) | ||||||
|         for doc in docs: |         for doc in docs: | ||||||
|             if docs_filter is not None and not docs_filter(doc): |             if docs_filter is not None and not docs_filter(doc): | ||||||
|  |  | ||||||
|  | @ -2,10 +2,7 @@ from __future__ import unicode_literals, print_function | ||||||
| import codecs | import codecs | ||||||
| import pathlib | import pathlib | ||||||
| 
 | 
 | ||||||
| try: | import ujson as json | ||||||
|     import ujson as json |  | ||||||
| except ImportError: |  | ||||||
|     import json |  | ||||||
| 
 | 
 | ||||||
| from .parts_of_speech import NOUN, VERB, ADJ, PUNCT | from .parts_of_speech import NOUN, VERB, ADJ, PUNCT | ||||||
| 
 | 
 | ||||||
|  | @ -28,7 +25,7 @@ class Lemmatizer(object): | ||||||
|                     exc[pos] = read_exc(file_) |                     exc[pos] = read_exc(file_) | ||||||
|             else: |             else: | ||||||
|                 exc[pos] = {} |                 exc[pos] = {} | ||||||
|         with (path / 'vocab' / 'lemma_rules.json').open('rb') as file_: |         with (path / 'vocab' / 'lemma_rules.json').open('r', encoding='utf8') as file_: | ||||||
|             rules = json.load(file_) |             rules = json.load(file_) | ||||||
|         return cls(index, exc, rules) |         return cls(index, exc, rules) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -175,7 +175,7 @@ cdef class Matcher: | ||||||
|     @classmethod |     @classmethod | ||||||
|     def load(cls, path, vocab): |     def load(cls, path, vocab): | ||||||
|         if (path / 'gazetteer.json').exists(): |         if (path / 'gazetteer.json').exists(): | ||||||
|             with (path / 'gazetteer.json').open('rb') as file_: |             with (path / 'gazetteer.json').open('r', encoding='utf8') as file_: | ||||||
|                 patterns = json.load(file_) |                 patterns = json.load(file_) | ||||||
|         else: |         else: | ||||||
|             patterns = {} |             patterns = {} | ||||||
|  |  | ||||||
|  | @ -57,24 +57,24 @@ cdef class Vocab: | ||||||
|                 "vectors argument to Vocab.load() deprecated. " |                 "vectors argument to Vocab.load() deprecated. " | ||||||
|                 "Install vectors after loading.") |                 "Install vectors after loading.") | ||||||
|         if tag_map is True and (path / 'vocab' / 'tag_map.json').exists(): |         if tag_map is True and (path / 'vocab' / 'tag_map.json').exists(): | ||||||
|             with (path / 'vocab' / 'tag_map.json').open() as file_: |             with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_: | ||||||
|                 tag_map = json.load(file_) |                 tag_map = json.load(file_) | ||||||
|         if lex_attr_getters is not None \ |         if lex_attr_getters is not None \ | ||||||
|         and oov_prob is True \ |         and oov_prob is True \ | ||||||
|         and (path / 'vocab' / 'oov_prob').exists(): |         and (path / 'vocab' / 'oov_prob').exists(): | ||||||
|             with (path / 'vocab' / 'oov_prob').open() as file_: |             with (path / 'vocab' / 'oov_prob').open('r', encoding='utf8') as file_: | ||||||
|                 oov_prob = float(file_.read()) |                 oov_prob = float(file_.read()) | ||||||
|             lex_attr_getters[PROB] = lambda text: oov_prob |             lex_attr_getters[PROB] = lambda text: oov_prob | ||||||
|         if lemmatizer is True: |         if lemmatizer is True: | ||||||
|             lemmatizer = Lemmatizer.load(path) |             lemmatizer = Lemmatizer.load(path) | ||||||
|         if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists(): |         if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists(): | ||||||
|             with (path / 'vocab' / 'serializer.json').open() as file_: |             with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_: | ||||||
|                 serializer_freqs = json.load(file_) |                 serializer_freqs = json.load(file_) | ||||||
| 
 | 
 | ||||||
|         cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map, |         cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map, | ||||||
|                               lemmatizer=lemmatizer, serializer_freqs=serializer_freqs) |                               lemmatizer=lemmatizer, serializer_freqs=serializer_freqs) | ||||||
| 
 | 
 | ||||||
|         with (path / 'vocab' / 'strings.json').open() as file_: |         with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_: | ||||||
|             self.strings.load(file_) |             self.strings.load(file_) | ||||||
|         self.load_lexemes(path / 'vocab' / 'lexemes.bin') |         self.load_lexemes(path / 'vocab' / 'lexemes.bin') | ||||||
|         return self |         return self | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user