mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Fix json loading, for Python 3.
This commit is contained in:
parent
2e92c6fb3a
commit
f5fe4f595b
|
@ -10,10 +10,7 @@ from os import path
|
|||
|
||||
from libc.string cimport memset
|
||||
|
||||
try:
|
||||
import ujson as json
|
||||
except ImportError:
|
||||
import json
|
||||
|
||||
from .syntax import nonproj
|
||||
|
||||
|
@ -149,7 +146,7 @@ def read_json_file(loc, docs_filter=None):
|
|||
for filename in os.listdir(loc):
|
||||
yield from read_json_file(path.join(loc, filename))
|
||||
else:
|
||||
with open(loc) as file_:
|
||||
with io.open(loc, 'r', encoding='utf8') as file_:
|
||||
docs = json.load(file_)
|
||||
for doc in docs:
|
||||
if docs_filter is not None and not docs_filter(doc):
|
||||
|
|
|
@ -2,10 +2,7 @@ from __future__ import unicode_literals, print_function
|
|||
import codecs
|
||||
import pathlib
|
||||
|
||||
try:
|
||||
import ujson as json
|
||||
except ImportError:
|
||||
import json
|
||||
|
||||
from .parts_of_speech import NOUN, VERB, ADJ, PUNCT
|
||||
|
||||
|
|
|
@ -57,24 +57,24 @@ cdef class Vocab:
|
|||
"vectors argument to Vocab.load() deprecated. "
|
||||
"Install vectors after loading.")
|
||||
if tag_map is True and (path / 'vocab' / 'tag_map.json').exists():
|
||||
with (path / 'vocab' / 'tag_map.json').open() as file_:
|
||||
with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_:
|
||||
tag_map = json.load(file_)
|
||||
if lex_attr_getters is not None \
|
||||
and oov_prob is True \
|
||||
and (path / 'vocab' / 'oov_prob').exists():
|
||||
with (path / 'vocab' / 'oov_prob').open() as file_:
|
||||
with (path / 'vocab' / 'oov_prob').open('r', encoding='utf8') as file_:
|
||||
oov_prob = float(file_.read())
|
||||
lex_attr_getters[PROB] = lambda text: oov_prob
|
||||
if lemmatizer is True:
|
||||
lemmatizer = Lemmatizer.load(path)
|
||||
if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists():
|
||||
with (path / 'vocab' / 'serializer.json').open() as file_:
|
||||
with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_:
|
||||
serializer_freqs = json.load(file_)
|
||||
|
||||
cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map,
|
||||
lemmatizer=lemmatizer, serializer_freqs=serializer_freqs)
|
||||
|
||||
with (path / 'vocab' / 'strings.json').open() as file_:
|
||||
with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_:
|
||||
self.strings.load(file_)
|
||||
self.load_lexemes(path / 'vocab' / 'lexemes.bin')
|
||||
return self
|
||||
|
|
Loading…
Reference in New Issue
Block a user