mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Fix json imports and use ujson
This commit is contained in:
parent
958b12dec8
commit
e1efd589c3
|
@ -145,7 +145,7 @@ def read_json_file(loc, docs_filter=None):
|
||||||
yield from read_json_file(loc / filename)
|
yield from read_json_file(loc / filename)
|
||||||
else:
|
else:
|
||||||
with io.open(loc, 'r', encoding='utf8') as file_:
|
with io.open(loc, 'r', encoding='utf8') as file_:
|
||||||
docs = json.load(file_)
|
docs = ujson.load(file_)
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
if docs_filter is not None and not docs_filter(doc):
|
if docs_filter is not None and not docs_filter(doc):
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import ujson
|
||||||
|
|
||||||
from .typedefs cimport attr_t
|
from .typedefs cimport attr_t
|
||||||
from .typedefs cimport hash_t
|
from .typedefs cimport hash_t
|
||||||
from .attrs cimport attr_id_t
|
from .attrs cimport attr_id_t
|
||||||
|
@ -53,12 +55,6 @@ from .attrs import FLAG36 as L9_ENT
|
||||||
from .attrs import FLAG35 as L10_ENT
|
from .attrs import FLAG35 as L10_ENT
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
import ujson as json
|
|
||||||
except ImportError:
|
|
||||||
import json
|
|
||||||
|
|
||||||
|
|
||||||
cpdef enum quantifier_t:
|
cpdef enum quantifier_t:
|
||||||
_META
|
_META
|
||||||
ONE
|
ONE
|
||||||
|
@ -194,7 +190,7 @@ cdef class Matcher:
|
||||||
"""
|
"""
|
||||||
if (path / 'gazetteer.json').exists():
|
if (path / 'gazetteer.json').exists():
|
||||||
with (path / 'gazetteer.json').open('r', encoding='utf8') as file_:
|
with (path / 'gazetteer.json').open('r', encoding='utf8') as file_:
|
||||||
patterns = json.load(file_)
|
patterns = ujson.load(file_)
|
||||||
else:
|
else:
|
||||||
patterns = {}
|
patterns = {}
|
||||||
return cls(vocab, patterns)
|
return cls(vocab, patterns)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import json
|
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import ujson
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
@ -131,7 +131,7 @@ cdef class Tagger:
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
if (path / 'templates.json').exists():
|
if (path / 'templates.json').exists():
|
||||||
with (path / 'templates.json').open('r', encoding='utf8') as file_:
|
with (path / 'templates.json').open('r', encoding='utf8') as file_:
|
||||||
templates = json.load(file_)
|
templates = ujson.load(file_)
|
||||||
elif require:
|
elif require:
|
||||||
raise IOError(
|
raise IOError(
|
||||||
"Required file %s/templates.json not found when loading Tagger" % str(path))
|
"Required file %s/templates.json not found when loading Tagger" % str(path))
|
||||||
|
|
|
@ -2,16 +2,10 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import ujson
|
||||||
|
|
||||||
from cython.operator cimport dereference as deref
|
from cython.operator cimport dereference as deref
|
||||||
from cython.operator cimport preincrement as preinc
|
from cython.operator cimport preincrement as preinc
|
||||||
|
|
||||||
try:
|
|
||||||
import ujson as json
|
|
||||||
except ImportError:
|
|
||||||
import json
|
|
||||||
|
|
||||||
|
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
from preshed.maps cimport PreshMap
|
from preshed.maps cimport PreshMap
|
||||||
|
|
||||||
|
@ -52,7 +46,7 @@ cdef class Tokenizer:
|
||||||
path = util.ensure_path(path)
|
path = util.ensure_path(path)
|
||||||
if rules is None:
|
if rules is None:
|
||||||
with (path / 'tokenizer' / 'specials.json').open('r', encoding='utf8') as file_:
|
with (path / 'tokenizer' / 'specials.json').open('r', encoding='utf8') as file_:
|
||||||
rules = json.load(file_)
|
rules = ujson.load(file_)
|
||||||
if prefix_search in (None, True):
|
if prefix_search in (None, True):
|
||||||
with (path / 'tokenizer' / 'prefix.txt').open() as file_:
|
with (path / 'tokenizer' / 'prefix.txt').open() as file_:
|
||||||
entries = file_.read().split('\n')
|
entries = file_.read().split('\n')
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import json
|
import ujson
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
|
@ -117,7 +117,7 @@ def parse_package_meta(package_path, package, require=True):
|
||||||
location = package_path / package / 'meta.json'
|
location = package_path / package / 'meta.json'
|
||||||
if location.is_file():
|
if location.is_file():
|
||||||
with io.open(location, encoding='utf8') as f:
|
with io.open(location, encoding='utf8') as f:
|
||||||
meta = json.load(f)
|
meta = ujson.load(f)
|
||||||
return meta
|
return meta
|
||||||
elif require:
|
elif require:
|
||||||
raise IOError("Could not read meta.json from %s" % location)
|
raise IOError("Could not read meta.json from %s" % location)
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import bz2
|
import bz2
|
||||||
import ujson as json
|
import ujson
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
|
@ -69,7 +69,7 @@ cdef class Vocab:
|
||||||
"Install vectors after loading.")
|
"Install vectors after loading.")
|
||||||
if tag_map is True and (path / 'vocab' / 'tag_map.json').exists():
|
if tag_map is True and (path / 'vocab' / 'tag_map.json').exists():
|
||||||
with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_:
|
with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_:
|
||||||
tag_map = json.load(file_)
|
tag_map = ujson.load(file_)
|
||||||
elif tag_map is True:
|
elif tag_map is True:
|
||||||
tag_map = None
|
tag_map = None
|
||||||
if lex_attr_getters is not None \
|
if lex_attr_getters is not None \
|
||||||
|
@ -82,12 +82,12 @@ cdef class Vocab:
|
||||||
lemmatizer = Lemmatizer.load(path)
|
lemmatizer = Lemmatizer.load(path)
|
||||||
if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists():
|
if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists():
|
||||||
with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_:
|
with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_:
|
||||||
serializer_freqs = json.load(file_)
|
serializer_freqs = ujson.load(file_)
|
||||||
else:
|
else:
|
||||||
serializer_freqs = None
|
serializer_freqs = None
|
||||||
|
|
||||||
with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_:
|
with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_:
|
||||||
strings_list = json.load(file_)
|
strings_list = ujson.load(file_)
|
||||||
cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map,
|
cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map,
|
||||||
lemmatizer=lemmatizer, serializer_freqs=serializer_freqs,
|
lemmatizer=lemmatizer, serializer_freqs=serializer_freqs,
|
||||||
strings=strings_list)
|
strings=strings_list)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user