caught more codecs.open -> io.open

This commit is contained in:
alvations 2015-09-30 20:20:09 +02:00
parent 764bdc62e7
commit 8caedba42a
8 changed files with 17 additions and 17 deletions

View File

@ -20,7 +20,7 @@ from pathlib import Path
from shutil import copyfile from shutil import copyfile
from shutil import copytree from shutil import copytree
import codecs import io
from spacy.en import get_lex_props from spacy.en import get_lex_props
from spacy.vocab import Vocab from spacy.vocab import Vocab
@ -41,7 +41,7 @@ def setup_tokenizer(lang_data_dir, tok_dir):
def _read_clusters(loc): def _read_clusters(loc):
clusters = {} clusters = {}
for line in codecs.open(str(loc), 'r', 'utf8'): for line in io.open(str(loc), 'r', encoding='utf8'):
try: try:
cluster, word, freq = line.split() cluster, word, freq = line.split()
except ValueError: except ValueError:
@ -65,7 +65,7 @@ def _read_clusters(loc):
def _read_probs(loc): def _read_probs(loc):
probs = {} probs = {}
for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')): for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')):
prob, word = line.split() prob, word = line.split()
prob = float(prob) prob = float(prob)
probs[word] = prob probs[word] = prob

View File

@ -1,11 +1,11 @@
import codecs import io
import plac import plac
from spacy.en import English from spacy.en import English
def main(text_loc): def main(text_loc):
with codecs.open(text_loc, 'r', 'utf8') as file_: with io.open(text_loc, 'r', encoding='utf8') as file_:
text = file_.read() text = file_.read()
NLU = English() NLU = English()
for paragraph in text.split('\n\n'): for paragraph in text.split('\n\n'):

View File

@ -27,7 +27,7 @@ import json
from os import path from os import path
import os import os
import re import re
import codecs import io
from collections import defaultdict from collections import defaultdict
from spacy.munge import read_ptb from spacy.munge import read_ptb
@ -122,7 +122,7 @@ def read_file(*pieces):
if not path.exists(loc): if not path.exists(loc):
return None return None
else: else:
return codecs.open(loc, 'r', 'utf8').read().strip() return io.open(loc, 'r', encoding='utf8').read().strip()
def get_file_names(section_dir, subsection): def get_file_names(section_dir, subsection):

View File

@ -1,6 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from os import path from os import path
import codecs import io
NOUN_RULES = ( NOUN_RULES = (
@ -85,7 +85,7 @@ def lemmatize(string, index, exceptions, rules):
def read_index(loc): def read_index(loc):
index = set() index = set()
for line in codecs.open(loc, 'r', 'utf8'): for line in io.open(loc, 'r', encoding='utf8'):
if line.startswith(' '): if line.startswith(' '):
continue continue
pieces = line.split() pieces = line.split()
@ -97,7 +97,7 @@ def read_index(loc):
def read_exc(loc): def read_exc(loc):
exceptions = {} exceptions = {}
for line in codecs.open(loc, 'r', 'utf8'): for line in io.open(loc, 'r', encoding='utf8'):
if line.startswith(' '): if line.startswith(' '):
continue continue
pieces = line.split() pieces = line.split()

View File

@ -1,5 +1,5 @@
import numpy import numpy
import codecs import io
import json import json
import ujson import ujson
import random import random

View File

@ -1,4 +1,4 @@
import codecs import io
from libc.string cimport memcpy from libc.string cimport memcpy
from murmurhash.mrmr cimport hash64 from murmurhash.mrmr cimport hash64
@ -112,11 +112,11 @@ cdef class StringStore:
string = &self.strings[i] string = &self.strings[i]
py_string = string.chars[:string.length] py_string = string.chars[:string.length]
strings.append(py_string.decode('utf8')) strings.append(py_string.decode('utf8'))
with codecs.open(loc, 'w', 'utf8') as file_: with io.open(loc, 'w', encoding='utf8') as file_:
file_.write(SEPARATOR.join(strings)) file_.write(SEPARATOR.join(strings))
def load(self, loc): def load(self, loc):
with codecs.open(loc, 'r', 'utf8') as file_: with io.open(loc, 'r', encoding='utf8') as file_:
strings = file_.read().split(SEPARATOR) strings = file_.read().split(SEPARATOR)
cdef unicode string cdef unicode string
cdef bytes byte_string cdef bytes byte_string

View File

@ -4,7 +4,7 @@ from libc.stdint cimport int32_t
import bz2 import bz2
from os import path from os import path
import codecs import io
import math import math
from .lexeme cimport EMPTY_LEXEME from .lexeme cimport EMPTY_LEXEME

View File

@ -1,6 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from os import path from os import path
import codecs import io
from spacy.en import English from spacy.en import English
@ -9,7 +9,7 @@ import pytest
@pytest.fixture @pytest.fixture
def sun_text(): def sun_text():
with codecs.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', 'utf8') as file_: with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_:
text = file_.read() text = file_.read()
return text return text