mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Fix json loading, for Python 3.
This commit is contained in:
		
							parent
							
								
									2e92c6fb3a
								
							
						
					
					
						commit
						f5fe4f595b
					
				| 
						 | 
					@ -10,10 +10,7 @@ from os import path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from libc.string cimport memset
 | 
					from libc.string cimport memset
 | 
				
			||||||
 | 
					
 | 
				
			||||||
try:
 | 
					import ujson as json
 | 
				
			||||||
    import ujson as json
 | 
					 | 
				
			||||||
except ImportError:
 | 
					 | 
				
			||||||
    import json
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .syntax import nonproj
 | 
					from .syntax import nonproj
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -149,7 +146,7 @@ def read_json_file(loc, docs_filter=None):
 | 
				
			||||||
        for filename in os.listdir(loc):
 | 
					        for filename in os.listdir(loc):
 | 
				
			||||||
            yield from read_json_file(path.join(loc, filename))
 | 
					            yield from read_json_file(path.join(loc, filename))
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        with open(loc) as file_:
 | 
					        with io.open(loc, 'r', encoding='utf8') as file_:
 | 
				
			||||||
            docs = json.load(file_)
 | 
					            docs = json.load(file_)
 | 
				
			||||||
        for doc in docs:
 | 
					        for doc in docs:
 | 
				
			||||||
            if docs_filter is not None and not docs_filter(doc):
 | 
					            if docs_filter is not None and not docs_filter(doc):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,10 +2,7 @@ from __future__ import unicode_literals, print_function
 | 
				
			||||||
import codecs
 | 
					import codecs
 | 
				
			||||||
import pathlib
 | 
					import pathlib
 | 
				
			||||||
 | 
					
 | 
				
			||||||
try:
 | 
					import ujson as json
 | 
				
			||||||
    import ujson as json
 | 
					 | 
				
			||||||
except ImportError:
 | 
					 | 
				
			||||||
    import json
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .parts_of_speech import NOUN, VERB, ADJ, PUNCT
 | 
					from .parts_of_speech import NOUN, VERB, ADJ, PUNCT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -57,24 +57,24 @@ cdef class Vocab:
 | 
				
			||||||
                "vectors argument to Vocab.load() deprecated. "
 | 
					                "vectors argument to Vocab.load() deprecated. "
 | 
				
			||||||
                "Install vectors after loading.")
 | 
					                "Install vectors after loading.")
 | 
				
			||||||
        if tag_map is True and (path / 'vocab' / 'tag_map.json').exists():
 | 
					        if tag_map is True and (path / 'vocab' / 'tag_map.json').exists():
 | 
				
			||||||
            with (path / 'vocab' / 'tag_map.json').open() as file_:
 | 
					            with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_:
 | 
				
			||||||
                tag_map = json.load(file_)
 | 
					                tag_map = json.load(file_)
 | 
				
			||||||
        if lex_attr_getters is not None \
 | 
					        if lex_attr_getters is not None \
 | 
				
			||||||
        and oov_prob is True \
 | 
					        and oov_prob is True \
 | 
				
			||||||
        and (path / 'vocab' / 'oov_prob').exists():
 | 
					        and (path / 'vocab' / 'oov_prob').exists():
 | 
				
			||||||
            with (path / 'vocab' / 'oov_prob').open() as file_:
 | 
					            with (path / 'vocab' / 'oov_prob').open('r', encoding='utf8') as file_:
 | 
				
			||||||
                oov_prob = float(file_.read())
 | 
					                oov_prob = float(file_.read())
 | 
				
			||||||
            lex_attr_getters[PROB] = lambda text: oov_prob
 | 
					            lex_attr_getters[PROB] = lambda text: oov_prob
 | 
				
			||||||
        if lemmatizer is True:
 | 
					        if lemmatizer is True:
 | 
				
			||||||
            lemmatizer = Lemmatizer.load(path)
 | 
					            lemmatizer = Lemmatizer.load(path)
 | 
				
			||||||
        if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists():
 | 
					        if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists():
 | 
				
			||||||
            with (path / 'vocab' / 'serializer.json').open() as file_:
 | 
					            with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_:
 | 
				
			||||||
                serializer_freqs = json.load(file_)
 | 
					                serializer_freqs = json.load(file_)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map,
 | 
					        cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map,
 | 
				
			||||||
                              lemmatizer=lemmatizer, serializer_freqs=serializer_freqs)
 | 
					                              lemmatizer=lemmatizer, serializer_freqs=serializer_freqs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with (path / 'vocab' / 'strings.json').open() as file_:
 | 
					        with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_:
 | 
				
			||||||
            self.strings.load(file_)
 | 
					            self.strings.load(file_)
 | 
				
			||||||
        self.load_lexemes(path / 'vocab' / 'lexemes.bin')
 | 
					        self.load_lexemes(path / 'vocab' / 'lexemes.bin')
 | 
				
			||||||
        return self
 | 
					        return self
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user