mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Use io module insteads of deprecated codecs module
This commit is contained in:
		
						commit
						83dccf0fd7
					
				| 
						 | 
					@ -27,8 +27,8 @@ from pathlib import Path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from shutil import copyfile
 | 
					from shutil import copyfile
 | 
				
			||||||
from shutil import copytree
 | 
					from shutil import copytree
 | 
				
			||||||
import codecs
 | 
					 | 
				
			||||||
from collections import defaultdict
 | 
					from collections import defaultdict
 | 
				
			||||||
 | 
					import io
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from spacy.vocab import Vocab
 | 
					from spacy.vocab import Vocab
 | 
				
			||||||
from spacy.vocab import write_binary_vectors
 | 
					from spacy.vocab import write_binary_vectors
 | 
				
			||||||
| 
						 | 
					@ -61,7 +61,7 @@ def _read_clusters(loc):
 | 
				
			||||||
        print("Warning: Clusters file not found")
 | 
					        print("Warning: Clusters file not found")
 | 
				
			||||||
        return {}
 | 
					        return {}
 | 
				
			||||||
    clusters = {}
 | 
					    clusters = {}
 | 
				
			||||||
    for line in codecs.open(str(loc), 'r', 'utf8'):
 | 
					    for line in io.open(str(loc), 'r', encoding='utf8'):
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            cluster, word, freq = line.split()
 | 
					            cluster, word, freq = line.split()
 | 
				
			||||||
        except ValueError:
 | 
					        except ValueError:
 | 
				
			||||||
| 
						 | 
					@ -88,7 +88,7 @@ def _read_probs(loc):
 | 
				
			||||||
        print("Probabilities file not found. Trying freqs.")
 | 
					        print("Probabilities file not found. Trying freqs.")
 | 
				
			||||||
        return {}, 0.0
 | 
					        return {}, 0.0
 | 
				
			||||||
    probs = {}
 | 
					    probs = {}
 | 
				
			||||||
    for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')):
 | 
					    for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')):
 | 
				
			||||||
        prob, word = line.split()
 | 
					        prob, word = line.split()
 | 
				
			||||||
        prob = float(prob)
 | 
					        prob = float(prob)
 | 
				
			||||||
        probs[word] = prob
 | 
					        probs[word] = prob
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,11 +1,11 @@
 | 
				
			||||||
import codecs
 | 
					import io
 | 
				
			||||||
import plac
 | 
					import plac
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from spacy.en import English
 | 
					from spacy.en import English
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main(text_loc):
 | 
					def main(text_loc):
 | 
				
			||||||
    with codecs.open(text_loc, 'r', 'utf8') as file_:
 | 
					    with io.open(text_loc, 'r', encoding='utf8') as file_:
 | 
				
			||||||
        text = file_.read()
 | 
					        text = file_.read()
 | 
				
			||||||
    NLU = English()
 | 
					    NLU = English()
 | 
				
			||||||
    for paragraph in text.split('\n\n'):
 | 
					    for paragraph in text.split('\n\n'):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,7 +6,7 @@ from __future__ import print_function
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
from os import path
 | 
					from os import path
 | 
				
			||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
import codecs
 | 
					import io
 | 
				
			||||||
import random
 | 
					import random
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import plac
 | 
					import plac
 | 
				
			||||||
| 
						 | 
					@ -177,7 +177,7 @@ def write_parses(Language, dev_loc, model_dir, out_loc):
 | 
				
			||||||
    nlp = Language(data_dir=model_dir)
 | 
					    nlp = Language(data_dir=model_dir)
 | 
				
			||||||
    gold_tuples = read_json_file(dev_loc)
 | 
					    gold_tuples = read_json_file(dev_loc)
 | 
				
			||||||
    scorer = Scorer()
 | 
					    scorer = Scorer()
 | 
				
			||||||
    out_file = codecs.open(out_loc, 'w', 'utf8')
 | 
					    out_file = io.open(out_loc, 'w', 'utf8')
 | 
				
			||||||
    for raw_text, sents in gold_tuples:
 | 
					    for raw_text, sents in gold_tuples:
 | 
				
			||||||
        sents = _merge_sents(sents)
 | 
					        sents = _merge_sents(sents)
 | 
				
			||||||
        for annot_tuples, brackets in sents:
 | 
					        for annot_tuples, brackets in sents:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -27,7 +27,7 @@ import json
 | 
				
			||||||
from os import path
 | 
					from os import path
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import codecs
 | 
					import io
 | 
				
			||||||
from collections import defaultdict
 | 
					from collections import defaultdict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from spacy.munge import read_ptb
 | 
					from spacy.munge import read_ptb
 | 
				
			||||||
| 
						 | 
					@ -122,7 +122,7 @@ def read_file(*pieces):
 | 
				
			||||||
    if not path.exists(loc):
 | 
					    if not path.exists(loc):
 | 
				
			||||||
        return None
 | 
					        return None
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        return codecs.open(loc, 'r', 'utf8').read().strip()
 | 
					        return io.open(loc, 'r', encoding='utf8').read().strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_file_names(section_dir, subsection):
 | 
					def get_file_names(section_dir, subsection):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,5 +1,7 @@
 | 
				
			||||||
import numpy
 | 
					import numpy
 | 
				
			||||||
import codecs
 | 
					import io
 | 
				
			||||||
 | 
					import json
 | 
				
			||||||
 | 
					import ujson
 | 
				
			||||||
import random
 | 
					import random
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,5 +1,9 @@
 | 
				
			||||||
 | 
					<<<<<<< HEAD
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
import codecs
 | 
					import codecs
 | 
				
			||||||
 | 
					=======
 | 
				
			||||||
 | 
					import io
 | 
				
			||||||
 | 
					>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from libc.string cimport memcpy
 | 
					from libc.string cimport memcpy
 | 
				
			||||||
from murmurhash.mrmr cimport hash64
 | 
					from murmurhash.mrmr cimport hash64
 | 
				
			||||||
| 
						 | 
					@ -129,6 +133,7 @@ cdef class StringStore:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def dump(self, loc):
 | 
					    def dump(self, loc):
 | 
				
			||||||
        cdef Utf8Str* string
 | 
					        cdef Utf8Str* string
 | 
				
			||||||
 | 
					<<<<<<< HEAD
 | 
				
			||||||
        cdef unicode py_string
 | 
					        cdef unicode py_string
 | 
				
			||||||
        cdef int i
 | 
					        cdef int i
 | 
				
			||||||
        with codecs.open(loc, 'w', 'utf8') as file_:
 | 
					        with codecs.open(loc, 'w', 'utf8') as file_:
 | 
				
			||||||
| 
						 | 
					@ -138,9 +143,18 @@ cdef class StringStore:
 | 
				
			||||||
                file_.write(py_string)
 | 
					                file_.write(py_string)
 | 
				
			||||||
                if (i+1) != self.size:
 | 
					                if (i+1) != self.size:
 | 
				
			||||||
                    file_.write(SEPARATOR)
 | 
					                    file_.write(SEPARATOR)
 | 
				
			||||||
 | 
					=======
 | 
				
			||||||
 | 
					        cdef bytes py_string
 | 
				
			||||||
 | 
					        for i in range(self.size):
 | 
				
			||||||
 | 
					            string = &self.strings[i]
 | 
				
			||||||
 | 
					            py_string = string.chars[:string.length]
 | 
				
			||||||
 | 
					            strings.append(py_string.decode('utf8'))
 | 
				
			||||||
 | 
					        with io.open(loc, 'w', encoding='utf8') as file_:
 | 
				
			||||||
 | 
					            file_.write(SEPARATOR.join(strings))
 | 
				
			||||||
 | 
					>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def load(self, loc):
 | 
					    def load(self, loc):
 | 
				
			||||||
        with codecs.open(loc, 'r', 'utf8') as file_:
 | 
					        with io.open(loc, 'r', encoding='utf8') as file_:
 | 
				
			||||||
            strings = file_.read().split(SEPARATOR)
 | 
					            strings = file_.read().split(SEPARATOR)
 | 
				
			||||||
        if strings == ['']:
 | 
					        if strings == ['']:
 | 
				
			||||||
            return None
 | 
					            return None
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,5 +1,5 @@
 | 
				
			||||||
from os import path
 | 
					from os import path
 | 
				
			||||||
import codecs
 | 
					import io
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
 | 
					from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
 | 
				
			||||||
| 
						 | 
					@ -8,7 +8,7 @@ DATA_DIR = path.join(path.dirname(__file__), '..', 'data')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def utf8open(loc, mode='r'):
 | 
					def utf8open(loc, mode='r'):
 | 
				
			||||||
    return codecs.open(loc, mode, 'utf8')
 | 
					    return io.open(loc, mode, encoding='utf8')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def read_lang_data(data_dir):
 | 
					def read_lang_data(data_dir):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7,7 +7,7 @@ from libc.stdint cimport uint64_t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import bz2
 | 
					import bz2
 | 
				
			||||||
from os import path
 | 
					from os import path
 | 
				
			||||||
import codecs
 | 
					import io
 | 
				
			||||||
import math
 | 
					import math
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,13 +1,17 @@
 | 
				
			||||||
from __future__ import unicode_literals
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
from os import path
 | 
					from os import path
 | 
				
			||||||
import codecs
 | 
					import io
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import pytest
 | 
					import pytest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@pytest.fixture
 | 
					@pytest.fixture
 | 
				
			||||||
def sun_text():
 | 
					def sun_text():
 | 
				
			||||||
 | 
					<<<<<<< HEAD:tests/parser/test_parse_navigate.py
 | 
				
			||||||
    with codecs.open(path.join(path.dirname(__file__), '..', 'sun.txt'), 'r', 'utf8') as file_:
 | 
					    with codecs.open(path.join(path.dirname(__file__), '..', 'sun.txt'), 'r', 'utf8') as file_:
 | 
				
			||||||
 | 
					=======
 | 
				
			||||||
 | 
					    with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_:
 | 
				
			||||||
 | 
					>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd:tests/test_parse_navigate.py
 | 
				
			||||||
        text = file_.read()
 | 
					        text = file_.read()
 | 
				
			||||||
    return text
 | 
					    return text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user