Fix json imports and use ujson

This commit is contained in:
ines 2017-04-15 12:13:34 +02:00
parent 958b12dec8
commit e1efd589c3
6 changed files with 14 additions and 24 deletions

View File

@ -145,7 +145,7 @@ def read_json_file(loc, docs_filter=None):
yield from read_json_file(loc / filename) yield from read_json_file(loc / filename)
else: else:
with io.open(loc, 'r', encoding='utf8') as file_: with io.open(loc, 'r', encoding='utf8') as file_:
docs = json.load(file_) docs = ujson.load(file_)
for doc in docs: for doc in docs:
if docs_filter is not None and not docs_filter(doc): if docs_filter is not None and not docs_filter(doc):
continue continue

View File

@ -3,6 +3,8 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import ujson
from .typedefs cimport attr_t from .typedefs cimport attr_t
from .typedefs cimport hash_t from .typedefs cimport hash_t
from .attrs cimport attr_id_t from .attrs cimport attr_id_t
@ -53,12 +55,6 @@ from .attrs import FLAG36 as L9_ENT
from .attrs import FLAG35 as L10_ENT from .attrs import FLAG35 as L10_ENT
try:
import ujson as json
except ImportError:
import json
cpdef enum quantifier_t: cpdef enum quantifier_t:
_META _META
ONE ONE
@ -194,7 +190,7 @@ cdef class Matcher:
""" """
if (path / 'gazetteer.json').exists(): if (path / 'gazetteer.json').exists():
with (path / 'gazetteer.json').open('r', encoding='utf8') as file_: with (path / 'gazetteer.json').open('r', encoding='utf8') as file_:
patterns = json.load(file_) patterns = ujson.load(file_)
else: else:
patterns = {} patterns = {}
return cls(vocab, patterns) return cls(vocab, patterns)

View File

@ -1,7 +1,7 @@
import json
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import ujson
from collections import defaultdict from collections import defaultdict
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
@ -131,7 +131,7 @@ cdef class Tagger:
path = util.ensure_path(path) path = util.ensure_path(path)
if (path / 'templates.json').exists(): if (path / 'templates.json').exists():
with (path / 'templates.json').open('r', encoding='utf8') as file_: with (path / 'templates.json').open('r', encoding='utf8') as file_:
templates = json.load(file_) templates = ujson.load(file_)
elif require: elif require:
raise IOError( raise IOError(
"Required file %s/templates.json not found when loading Tagger" % str(path)) "Required file %s/templates.json not found when loading Tagger" % str(path))

View File

@ -2,16 +2,10 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import ujson
from cython.operator cimport dereference as deref from cython.operator cimport dereference as deref
from cython.operator cimport preincrement as preinc from cython.operator cimport preincrement as preinc
try:
import ujson as json
except ImportError:
import json
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from preshed.maps cimport PreshMap from preshed.maps cimport PreshMap
@ -52,7 +46,7 @@ cdef class Tokenizer:
path = util.ensure_path(path) path = util.ensure_path(path)
if rules is None: if rules is None:
with (path / 'tokenizer' / 'specials.json').open('r', encoding='utf8') as file_: with (path / 'tokenizer' / 'specials.json').open('r', encoding='utf8') as file_:
rules = json.load(file_) rules = ujson.load(file_)
if prefix_search in (None, True): if prefix_search in (None, True):
with (path / 'tokenizer' / 'prefix.txt').open() as file_: with (path / 'tokenizer' / 'prefix.txt').open() as file_:
entries = file_.read().split('\n') entries = file_.read().split('\n')

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals, print_function from __future__ import unicode_literals, print_function
import io import io
import json import ujson
import re import re
from pathlib import Path from pathlib import Path
import sys import sys
@ -117,7 +117,7 @@ def parse_package_meta(package_path, package, require=True):
location = package_path / package / 'meta.json' location = package_path / package / 'meta.json'
if location.is_file(): if location.is_file():
with io.open(location, encoding='utf8') as f: with io.open(location, encoding='utf8') as f:
meta = json.load(f) meta = ujson.load(f)
return meta return meta
elif require: elif require:
raise IOError("Could not read meta.json from %s" % location) raise IOError("Could not read meta.json from %s" % location)

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import bz2 import bz2
import ujson as json import ujson
import re import re
from libc.string cimport memset from libc.string cimport memset
@ -69,7 +69,7 @@ cdef class Vocab:
"Install vectors after loading.") "Install vectors after loading.")
if tag_map is True and (path / 'vocab' / 'tag_map.json').exists(): if tag_map is True and (path / 'vocab' / 'tag_map.json').exists():
with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_: with (path / 'vocab' / 'tag_map.json').open('r', encoding='utf8') as file_:
tag_map = json.load(file_) tag_map = ujson.load(file_)
elif tag_map is True: elif tag_map is True:
tag_map = None tag_map = None
if lex_attr_getters is not None \ if lex_attr_getters is not None \
@ -82,12 +82,12 @@ cdef class Vocab:
lemmatizer = Lemmatizer.load(path) lemmatizer = Lemmatizer.load(path)
if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists(): if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists():
with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_: with (path / 'vocab' / 'serializer.json').open('r', encoding='utf8') as file_:
serializer_freqs = json.load(file_) serializer_freqs = ujson.load(file_)
else: else:
serializer_freqs = None serializer_freqs = None
with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_: with (path / 'vocab' / 'strings.json').open('r', encoding='utf8') as file_:
strings_list = json.load(file_) strings_list = ujson.load(file_)
cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map, cdef Vocab self = cls(lex_attr_getters=lex_attr_getters, tag_map=tag_map,
lemmatizer=lemmatizer, serializer_freqs=serializer_freqs, lemmatizer=lemmatizer, serializer_freqs=serializer_freqs,
strings=strings_list) strings=strings_list)