mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
caught more codecs.open -> io.open
This commit is contained in:
parent
764bdc62e7
commit
8caedba42a
|
@ -20,7 +20,7 @@ from pathlib import Path
|
|||
|
||||
from shutil import copyfile
|
||||
from shutil import copytree
|
||||
import codecs
|
||||
import io
|
||||
|
||||
from spacy.en import get_lex_props
|
||||
from spacy.vocab import Vocab
|
||||
|
@ -41,7 +41,7 @@ def setup_tokenizer(lang_data_dir, tok_dir):
|
|||
|
||||
def _read_clusters(loc):
|
||||
clusters = {}
|
||||
for line in codecs.open(str(loc), 'r', 'utf8'):
|
||||
for line in io.open(str(loc), 'r', encoding='utf8'):
|
||||
try:
|
||||
cluster, word, freq = line.split()
|
||||
except ValueError:
|
||||
|
@ -65,7 +65,7 @@ def _read_clusters(loc):
|
|||
|
||||
def _read_probs(loc):
|
||||
probs = {}
|
||||
for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')):
|
||||
for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')):
|
||||
prob, word = line.split()
|
||||
prob = float(prob)
|
||||
probs[word] = prob
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
import codecs
|
||||
import io
|
||||
import plac
|
||||
|
||||
from spacy.en import English
|
||||
|
||||
|
||||
def main(text_loc):
|
||||
with codecs.open(text_loc, 'r', 'utf8') as file_:
|
||||
with io.open(text_loc, 'r', encoding='utf8') as file_:
|
||||
text = file_.read()
|
||||
NLU = English()
|
||||
for paragraph in text.split('\n\n'):
|
||||
|
|
|
@ -27,7 +27,7 @@ import json
|
|||
from os import path
|
||||
import os
|
||||
import re
|
||||
import codecs
|
||||
import io
|
||||
from collections import defaultdict
|
||||
|
||||
from spacy.munge import read_ptb
|
||||
|
@ -122,7 +122,7 @@ def read_file(*pieces):
|
|||
if not path.exists(loc):
|
||||
return None
|
||||
else:
|
||||
return codecs.open(loc, 'r', 'utf8').read().strip()
|
||||
return io.open(loc, 'r', encoding='utf8').read().strip()
|
||||
|
||||
|
||||
def get_file_names(section_dir, subsection):
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from __future__ import unicode_literals
|
||||
from os import path
|
||||
import codecs
|
||||
import io
|
||||
|
||||
|
||||
NOUN_RULES = (
|
||||
|
@ -85,7 +85,7 @@ def lemmatize(string, index, exceptions, rules):
|
|||
|
||||
def read_index(loc):
|
||||
index = set()
|
||||
for line in codecs.open(loc, 'r', 'utf8'):
|
||||
for line in io.open(loc, 'r', encoding='utf8'):
|
||||
if line.startswith(' '):
|
||||
continue
|
||||
pieces = line.split()
|
||||
|
@ -97,7 +97,7 @@ def read_index(loc):
|
|||
|
||||
def read_exc(loc):
|
||||
exceptions = {}
|
||||
for line in codecs.open(loc, 'r', 'utf8'):
|
||||
for line in io.open(loc, 'r', encoding='utf8'):
|
||||
if line.startswith(' '):
|
||||
continue
|
||||
pieces = line.split()
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import numpy
|
||||
import codecs
|
||||
import io
|
||||
import json
|
||||
import ujson
|
||||
import random
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import codecs
|
||||
import io
|
||||
|
||||
from libc.string cimport memcpy
|
||||
from murmurhash.mrmr cimport hash64
|
||||
|
@ -112,11 +112,11 @@ cdef class StringStore:
|
|||
string = &self.strings[i]
|
||||
py_string = string.chars[:string.length]
|
||||
strings.append(py_string.decode('utf8'))
|
||||
with codecs.open(loc, 'w', 'utf8') as file_:
|
||||
with io.open(loc, 'w', encoding='utf8') as file_:
|
||||
file_.write(SEPARATOR.join(strings))
|
||||
|
||||
def load(self, loc):
|
||||
with codecs.open(loc, 'r', 'utf8') as file_:
|
||||
with io.open(loc, 'r', encoding='utf8') as file_:
|
||||
strings = file_.read().split(SEPARATOR)
|
||||
cdef unicode string
|
||||
cdef bytes byte_string
|
||||
|
|
|
@ -4,7 +4,7 @@ from libc.stdint cimport int32_t
|
|||
|
||||
import bz2
|
||||
from os import path
|
||||
import codecs
|
||||
import io
|
||||
import math
|
||||
|
||||
from .lexeme cimport EMPTY_LEXEME
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from __future__ import unicode_literals
|
||||
from os import path
|
||||
import codecs
|
||||
import io
|
||||
|
||||
from spacy.en import English
|
||||
|
||||
|
@ -9,7 +9,7 @@ import pytest
|
|||
|
||||
@pytest.fixture
|
||||
def sun_text():
|
||||
with codecs.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', 'utf8') as file_:
|
||||
with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_:
|
||||
text = file_.read()
|
||||
return text
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user