mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* Use io module insteads of deprecated codecs module
This commit is contained in:
commit
83dccf0fd7
|
@ -27,8 +27,8 @@ from pathlib import Path
|
||||||
|
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
from shutil import copytree
|
from shutil import copytree
|
||||||
import codecs
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
import io
|
||||||
|
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
from spacy.vocab import write_binary_vectors
|
from spacy.vocab import write_binary_vectors
|
||||||
|
@ -61,7 +61,7 @@ def _read_clusters(loc):
|
||||||
print("Warning: Clusters file not found")
|
print("Warning: Clusters file not found")
|
||||||
return {}
|
return {}
|
||||||
clusters = {}
|
clusters = {}
|
||||||
for line in codecs.open(str(loc), 'r', 'utf8'):
|
for line in io.open(str(loc), 'r', encoding='utf8'):
|
||||||
try:
|
try:
|
||||||
cluster, word, freq = line.split()
|
cluster, word, freq = line.split()
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
@ -88,7 +88,7 @@ def _read_probs(loc):
|
||||||
print("Probabilities file not found. Trying freqs.")
|
print("Probabilities file not found. Trying freqs.")
|
||||||
return {}, 0.0
|
return {}, 0.0
|
||||||
probs = {}
|
probs = {}
|
||||||
for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')):
|
for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')):
|
||||||
prob, word = line.split()
|
prob, word = line.split()
|
||||||
prob = float(prob)
|
prob = float(prob)
|
||||||
probs[word] = prob
|
probs[word] = prob
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
import codecs
|
import io
|
||||||
import plac
|
import plac
|
||||||
|
|
||||||
from spacy.en import English
|
from spacy.en import English
|
||||||
|
|
||||||
|
|
||||||
def main(text_loc):
|
def main(text_loc):
|
||||||
with codecs.open(text_loc, 'r', 'utf8') as file_:
|
with io.open(text_loc, 'r', encoding='utf8') as file_:
|
||||||
text = file_.read()
|
text = file_.read()
|
||||||
NLU = English()
|
NLU = English()
|
||||||
for paragraph in text.split('\n\n'):
|
for paragraph in text.split('\n\n'):
|
||||||
|
|
|
@ -6,7 +6,7 @@ from __future__ import print_function
|
||||||
import os
|
import os
|
||||||
from os import path
|
from os import path
|
||||||
import shutil
|
import shutil
|
||||||
import codecs
|
import io
|
||||||
import random
|
import random
|
||||||
|
|
||||||
import plac
|
import plac
|
||||||
|
@ -177,7 +177,7 @@ def write_parses(Language, dev_loc, model_dir, out_loc):
|
||||||
nlp = Language(data_dir=model_dir)
|
nlp = Language(data_dir=model_dir)
|
||||||
gold_tuples = read_json_file(dev_loc)
|
gold_tuples = read_json_file(dev_loc)
|
||||||
scorer = Scorer()
|
scorer = Scorer()
|
||||||
out_file = codecs.open(out_loc, 'w', 'utf8')
|
out_file = io.open(out_loc, 'w', 'utf8')
|
||||||
for raw_text, sents in gold_tuples:
|
for raw_text, sents in gold_tuples:
|
||||||
sents = _merge_sents(sents)
|
sents = _merge_sents(sents)
|
||||||
for annot_tuples, brackets in sents:
|
for annot_tuples, brackets in sents:
|
||||||
|
|
|
@ -27,7 +27,7 @@ import json
|
||||||
from os import path
|
from os import path
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import codecs
|
import io
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from spacy.munge import read_ptb
|
from spacy.munge import read_ptb
|
||||||
|
@ -122,7 +122,7 @@ def read_file(*pieces):
|
||||||
if not path.exists(loc):
|
if not path.exists(loc):
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
return codecs.open(loc, 'r', 'utf8').read().strip()
|
return io.open(loc, 'r', encoding='utf8').read().strip()
|
||||||
|
|
||||||
|
|
||||||
def get_file_names(section_dir, subsection):
|
def get_file_names(section_dir, subsection):
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
import numpy
|
import numpy
|
||||||
import codecs
|
import io
|
||||||
|
import json
|
||||||
|
import ujson
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
|
|
@ -1,5 +1,9 @@
|
||||||
|
<<<<<<< HEAD
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import codecs
|
import codecs
|
||||||
|
=======
|
||||||
|
import io
|
||||||
|
>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd
|
||||||
|
|
||||||
from libc.string cimport memcpy
|
from libc.string cimport memcpy
|
||||||
from murmurhash.mrmr cimport hash64
|
from murmurhash.mrmr cimport hash64
|
||||||
|
@ -129,6 +133,7 @@ cdef class StringStore:
|
||||||
|
|
||||||
def dump(self, loc):
|
def dump(self, loc):
|
||||||
cdef Utf8Str* string
|
cdef Utf8Str* string
|
||||||
|
<<<<<<< HEAD
|
||||||
cdef unicode py_string
|
cdef unicode py_string
|
||||||
cdef int i
|
cdef int i
|
||||||
with codecs.open(loc, 'w', 'utf8') as file_:
|
with codecs.open(loc, 'w', 'utf8') as file_:
|
||||||
|
@ -138,9 +143,18 @@ cdef class StringStore:
|
||||||
file_.write(py_string)
|
file_.write(py_string)
|
||||||
if (i+1) != self.size:
|
if (i+1) != self.size:
|
||||||
file_.write(SEPARATOR)
|
file_.write(SEPARATOR)
|
||||||
|
=======
|
||||||
|
cdef bytes py_string
|
||||||
|
for i in range(self.size):
|
||||||
|
string = &self.strings[i]
|
||||||
|
py_string = string.chars[:string.length]
|
||||||
|
strings.append(py_string.decode('utf8'))
|
||||||
|
with io.open(loc, 'w', encoding='utf8') as file_:
|
||||||
|
file_.write(SEPARATOR.join(strings))
|
||||||
|
>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd
|
||||||
|
|
||||||
def load(self, loc):
|
def load(self, loc):
|
||||||
with codecs.open(loc, 'r', 'utf8') as file_:
|
with io.open(loc, 'r', encoding='utf8') as file_:
|
||||||
strings = file_.read().split(SEPARATOR)
|
strings = file_.read().split(SEPARATOR)
|
||||||
if strings == ['']:
|
if strings == ['']:
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from os import path
|
from os import path
|
||||||
import codecs
|
import io
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
|
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
|
||||||
|
@ -8,7 +8,7 @@ DATA_DIR = path.join(path.dirname(__file__), '..', 'data')
|
||||||
|
|
||||||
|
|
||||||
def utf8open(loc, mode='r'):
|
def utf8open(loc, mode='r'):
|
||||||
return codecs.open(loc, mode, 'utf8')
|
return io.open(loc, mode, encoding='utf8')
|
||||||
|
|
||||||
|
|
||||||
def read_lang_data(data_dir):
|
def read_lang_data(data_dir):
|
||||||
|
|
|
@ -7,7 +7,7 @@ from libc.stdint cimport uint64_t
|
||||||
|
|
||||||
import bz2
|
import bz2
|
||||||
from os import path
|
from os import path
|
||||||
import codecs
|
import io
|
||||||
import math
|
import math
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
|
@ -1,13 +1,17 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from os import path
|
from os import path
|
||||||
import codecs
|
import io
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sun_text():
|
def sun_text():
|
||||||
|
<<<<<<< HEAD:tests/parser/test_parse_navigate.py
|
||||||
with codecs.open(path.join(path.dirname(__file__), '..', 'sun.txt'), 'r', 'utf8') as file_:
|
with codecs.open(path.join(path.dirname(__file__), '..', 'sun.txt'), 'r', 'utf8') as file_:
|
||||||
|
=======
|
||||||
|
with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_:
|
||||||
|
>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd:tests/test_parse_navigate.py
|
||||||
text = file_.read()
|
text = file_.read()
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user