caught more codecs.open -> io.open

2025-08-04 04:10:20 +03:00 · 2015-09-30 20:20:09 +02:00 · 2015-09-30 20:20:09 +02:00 · 8caedba42a
commit 8caedba42a
parent 764bdc62e7
8 changed files with 17 additions and 17 deletions
--- a/bin/init_model.py
+++ b/bin/init_model.py
@ -20,7 +20,7 @@ from pathlib import Path

 from shutil import copyfile
 from shutil import copytree
-import codecs
+import io

 from spacy.en import get_lex_props
 from spacy.vocab import Vocab
@ -41,7 +41,7 @@ def setup_tokenizer(lang_data_dir, tok_dir):

 def _read_clusters(loc):
    clusters = {}
-    for line in codecs.open(str(loc), 'r', 'utf8'):
+    for line in io.open(str(loc), 'r', encoding='utf8'):
        try:
            cluster, word, freq = line.split()
        except ValueError:
@ -65,7 +65,7 @@ def _read_clusters(loc):

 def _read_probs(loc):
    probs = {}
-    for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')):
+    for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')):
        prob, word = line.split()
        prob = float(prob)
        probs[word] = prob
--- a/bin/ner_tag.py
+++ b/bin/ner_tag.py
@ -1,11 +1,11 @@
-import codecs
+import io
 import plac

 from spacy.en import English


 def main(text_loc):
-    with codecs.open(text_loc, 'r', 'utf8') as file_:
+    with io.open(text_loc, 'r', encoding='utf8') as file_:
        text = file_.read()
    NLU = English()
    for paragraph in text.split('\n\n'):
--- a/bin/prepare_treebank.py
+++ b/bin/prepare_treebank.py
@ -27,7 +27,7 @@ import json
 from os import path
 import os
 import re
-import codecs
+import io
 from collections import defaultdict

 from spacy.munge import read_ptb
@ -122,7 +122,7 @@ def read_file(*pieces):
    if not path.exists(loc):
        return None
    else:
-        return codecs.open(loc, 'r', 'utf8').read().strip()
+        return io.open(loc, 'r', encoding='utf8').read().strip()


 def get_file_names(section_dir, subsection):
--- a/spacy/en/lemmatizer.py
+++ b/spacy/en/lemmatizer.py
@ -1,6 +1,6 @@
 from __future__ import unicode_literals
 from os import path
-import codecs
+import io


 NOUN_RULES = (
@ -85,7 +85,7 @@ def lemmatize(string, index, exceptions, rules):

 def read_index(loc):
    index = set()
-    for line in codecs.open(loc, 'r', 'utf8'):
+    for line in io.open(loc, 'r', encoding='utf8'):
        if line.startswith(' '):
            continue
        pieces = line.split()
@ -97,7 +97,7 @@ def read_index(loc):

 def read_exc(loc):
    exceptions = {}
-    for line in codecs.open(loc, 'r', 'utf8'):
+    for line in io.open(loc, 'r', encoding='utf8'):
        if line.startswith(' '):
            continue
        pieces = line.split()
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -1,5 +1,5 @@
 import numpy
-import codecs
+import io
 import json
 import ujson
 import random
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@ -1,4 +1,4 @@
-import codecs
+import io

 from libc.string cimport memcpy
 from murmurhash.mrmr cimport hash64
@ -112,11 +112,11 @@ cdef class StringStore:
            string = &self.strings[i]
            py_string = string.chars[:string.length]
            strings.append(py_string.decode('utf8'))
-        with codecs.open(loc, 'w', 'utf8') as file_:
+        with io.open(loc, 'w', encoding='utf8') as file_:
            file_.write(SEPARATOR.join(strings))

    def load(self, loc):
-        with codecs.open(loc, 'r', 'utf8') as file_:
+        with io.open(loc, 'r', encoding='utf8') as file_:
            strings = file_.read().split(SEPARATOR)
        cdef unicode string
        cdef bytes byte_string
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -4,7 +4,7 @@ from libc.stdint cimport int32_t

 import bz2
 from os import path
-import codecs
+import io
 import math

 from .lexeme cimport EMPTY_LEXEME
--- a/tests/test_parse_navigate.py
+++ b/tests/test_parse_navigate.py
@ -1,6 +1,6 @@
 from __future__ import unicode_literals
 from os import path
-import codecs
+import io

 from spacy.en import English

@ -9,7 +9,7 @@ import pytest

@pytest.fixture
 def sun_text():
-    with codecs.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', 'utf8') as file_:
+    with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_:
        text = file_.read()
    return text