* Use io module insteads of deprecated codecs module

2025-11-04 09:57:26 +03:00 · 2015-10-10 14:13:01 +11:00 · 2015-10-10 14:13:01 +11:00 · 83dccf0fd7
commit 83dccf0fd7
parent 55cd7008bb 8caedba42a
9 changed files with 35 additions and 15 deletions
--- a/bin/init_model.py
+++ b/bin/init_model.py
@ -27,8 +27,8 @@ from pathlib import Path

 from shutil import copyfile
 from shutil import copytree
-import codecs
 from collections import defaultdict
+import io

 from spacy.vocab import Vocab
 from spacy.vocab import write_binary_vectors
@ -61,7 +61,7 @@ def _read_clusters(loc):
        print("Warning: Clusters file not found")
        return {}
    clusters = {}
-    for line in codecs.open(str(loc), 'r', 'utf8'):
+    for line in io.open(str(loc), 'r', encoding='utf8'):
        try:
            cluster, word, freq = line.split()
        except ValueError:
@ -88,7 +88,7 @@ def _read_probs(loc):
        print("Probabilities file not found. Trying freqs.")
        return {}, 0.0
    probs = {}
-    for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')):
+    for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')):
        prob, word = line.split()
        prob = float(prob)
        probs[word] = prob
--- a/bin/ner_tag.py
+++ b/bin/ner_tag.py
@ -1,11 +1,11 @@
-import codecs
+import io
 import plac

 from spacy.en import English


 def main(text_loc):
-    with codecs.open(text_loc, 'r', 'utf8') as file_:
+    with io.open(text_loc, 'r', encoding='utf8') as file_:
        text = file_.read()
    NLU = English()
    for paragraph in text.split('\n\n'):
--- a/bin/parser/train.py
+++ b/bin/parser/train.py
@ -6,7 +6,7 @@ from __future__ import print_function
 import os
 from os import path
 import shutil
-import codecs
+import io
 import random

 import plac
@ -177,7 +177,7 @@ def write_parses(Language, dev_loc, model_dir, out_loc):
    nlp = Language(data_dir=model_dir)
    gold_tuples = read_json_file(dev_loc)
    scorer = Scorer()
-    out_file = codecs.open(out_loc, 'w', 'utf8')
+    out_file = io.open(out_loc, 'w', 'utf8')
    for raw_text, sents in gold_tuples:
        sents = _merge_sents(sents)
        for annot_tuples, brackets in sents:
--- a/bin/prepare_treebank.py
+++ b/bin/prepare_treebank.py
@ -27,7 +27,7 @@ import json
 from os import path
 import os
 import re
-import codecs
+import io
 from collections import defaultdict

 from spacy.munge import read_ptb
@ -122,7 +122,7 @@ def read_file(*pieces):
    if not path.exists(loc):
        return None
    else:
-        return codecs.open(loc, 'r', 'utf8').read().strip()
+        return io.open(loc, 'r', encoding='utf8').read().strip()


 def get_file_names(section_dir, subsection):
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -1,5 +1,7 @@
 import numpy
-import codecs
+import io
+import json
+import ujson
 import random
 import re
 import os
--- a/spacy/strings.pyx
+++ b/spacy/strings.pyx
@ -1,5 +1,9 @@
+<<<<<<< HEAD
 from __future__ import unicode_literals
 import codecs
+=======
+import io
+>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd

 from libc.string cimport memcpy
 from murmurhash.mrmr cimport hash64
@ -129,6 +133,7 @@ cdef class StringStore:

    def dump(self, loc):
        cdef Utf8Str* string
+<<<<<<< HEAD
        cdef unicode py_string
        cdef int i
        with codecs.open(loc, 'w', 'utf8') as file_:
@ -138,9 +143,18 @@ cdef class StringStore:
                file_.write(py_string)
                if (i+1) != self.size:
                    file_.write(SEPARATOR)
+=======
+        cdef bytes py_string
+        for i in range(self.size):
+            string = &self.strings[i]
+            py_string = string.chars[:string.length]
+            strings.append(py_string.decode('utf8'))
+        with io.open(loc, 'w', encoding='utf8') as file_:
+            file_.write(SEPARATOR.join(strings))
+>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd

    def load(self, loc):
-        with codecs.open(loc, 'r', 'utf8') as file_:
+        with io.open(loc, 'r', encoding='utf8') as file_:
            strings = file_.read().split(SEPARATOR)
        if strings == ['']:
            return None
--- a/spacy/util.py
+++ b/spacy/util.py
@ -1,5 +1,5 @@
 from os import path
-import codecs
+import io
 import json
 import re
 from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
@ -8,7 +8,7 @@ DATA_DIR = path.join(path.dirname(__file__), '..', 'data')


 def utf8open(loc, mode='r'):
-    return codecs.open(loc, mode, 'utf8')
+    return io.open(loc, mode, encoding='utf8')


 def read_lang_data(data_dir):
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -7,7 +7,7 @@ from libc.stdint cimport uint64_t

 import bz2
 from os import path
-import codecs
+import io
 import math
 import json

--- a/tests/parser/test_parse_navigate.py
+++ b/tests/parser/test_parse_navigate.py
@ -1,13 +1,17 @@
 from __future__ import unicode_literals
 from os import path
-import codecs
+import io

 import pytest


@pytest.fixture
 def sun_text():
+<<<<<<< HEAD:tests/parser/test_parse_navigate.py
    with codecs.open(path.join(path.dirname(__file__), '..', 'sun.txt'), 'r', 'utf8') as file_:
+=======
+    with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_:
+>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd:tests/test_parse_navigate.py
        text = file_.read()
    return text