Merge branch 'alvations-master'

This commit is contained in:
Matthew Honnibal 2015-10-10 14:13:24 +11:00
commit 6ea8f99a10
9 changed files with 35 additions and 15 deletions

View File

@ -27,8 +27,8 @@ from pathlib import Path
from shutil import copyfile
from shutil import copytree
import codecs
from collections import defaultdict
import io
from spacy.vocab import Vocab
from spacy.vocab import write_binary_vectors
@ -61,7 +61,7 @@ def _read_clusters(loc):
print("Warning: Clusters file not found")
return {}
clusters = {}
for line in codecs.open(str(loc), 'r', 'utf8'):
for line in io.open(str(loc), 'r', encoding='utf8'):
try:
cluster, word, freq = line.split()
except ValueError:
@ -88,7 +88,7 @@ def _read_probs(loc):
print("Probabilities file not found. Trying freqs.")
return {}, 0.0
probs = {}
for i, line in enumerate(codecs.open(str(loc), 'r', 'utf8')):
for i, line in enumerate(io.open(str(loc), 'r', encoding='utf8')):
prob, word = line.split()
prob = float(prob)
probs[word] = prob

View File

@ -1,11 +1,11 @@
import codecs
import io
import plac
from spacy.en import English
def main(text_loc):
with codecs.open(text_loc, 'r', 'utf8') as file_:
with io.open(text_loc, 'r', encoding='utf8') as file_:
text = file_.read()
NLU = English()
for paragraph in text.split('\n\n'):

View File

@ -6,7 +6,7 @@ from __future__ import print_function
import os
from os import path
import shutil
import codecs
import io
import random
import plac
@ -177,7 +177,7 @@ def write_parses(Language, dev_loc, model_dir, out_loc):
nlp = Language(data_dir=model_dir)
gold_tuples = read_json_file(dev_loc)
scorer = Scorer()
out_file = codecs.open(out_loc, 'w', 'utf8')
out_file = io.open(out_loc, 'w', 'utf8')
for raw_text, sents in gold_tuples:
sents = _merge_sents(sents)
for annot_tuples, brackets in sents:

View File

@ -27,7 +27,7 @@ import json
from os import path
import os
import re
import codecs
import io
from collections import defaultdict
from spacy.munge import read_ptb
@ -122,7 +122,7 @@ def read_file(*pieces):
if not path.exists(loc):
return None
else:
return codecs.open(loc, 'r', 'utf8').read().strip()
return io.open(loc, 'r', encoding='utf8').read().strip()
def get_file_names(section_dir, subsection):

View File

@ -1,5 +1,7 @@
import numpy
import codecs
import io
import json
import ujson
import random
import re
import os

View File

@ -1,5 +1,9 @@
<<<<<<< HEAD
from __future__ import unicode_literals
import codecs
=======
import io
>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd
from libc.string cimport memcpy
from murmurhash.mrmr cimport hash64
@ -129,6 +133,7 @@ cdef class StringStore:
def dump(self, loc):
cdef Utf8Str* string
<<<<<<< HEAD
cdef unicode py_string
cdef int i
with codecs.open(loc, 'w', 'utf8') as file_:
@ -138,9 +143,18 @@ cdef class StringStore:
file_.write(py_string)
if (i+1) != self.size:
file_.write(SEPARATOR)
=======
cdef bytes py_string
for i in range(self.size):
string = &self.strings[i]
py_string = string.chars[:string.length]
strings.append(py_string.decode('utf8'))
with io.open(loc, 'w', encoding='utf8') as file_:
file_.write(SEPARATOR.join(strings))
>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd
def load(self, loc):
with codecs.open(loc, 'r', 'utf8') as file_:
with io.open(loc, 'r', encoding='utf8') as file_:
strings = file_.read().split(SEPARATOR)
if strings == ['']:
return None

View File

@ -1,5 +1,5 @@
from os import path
import codecs
import io
import json
import re
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
@ -8,7 +8,7 @@ DATA_DIR = path.join(path.dirname(__file__), '..', 'data')
def utf8open(loc, mode='r'):
return codecs.open(loc, mode, 'utf8')
return io.open(loc, mode, encoding='utf8')
def read_lang_data(data_dir):

View File

@ -7,7 +7,7 @@ from libc.stdint cimport uint64_t
import bz2
from os import path
import codecs
import io
import math
import json

View File

@ -1,13 +1,17 @@
from __future__ import unicode_literals
from os import path
import codecs
import io
import pytest
@pytest.fixture
def sun_text():
<<<<<<< HEAD:tests/parser/test_parse_navigate.py
with codecs.open(path.join(path.dirname(__file__), '..', 'sun.txt'), 'r', 'utf8') as file_:
=======
with io.open(path.join(path.dirname(__file__), 'sun.txt'), 'r', encoding='utf8') as file_:
>>>>>>> 8caedba42a5255b9996533a732e17eee3f20a2dd:tests/test_parse_navigate.py
text = file_.read()
return text