* Add function to read detokenization rules

This commit is contained in:
Matthew Honnibal 2014-10-22 12:54:59 +11:00
parent 077885637d
commit 43d5964e13

View File

@ -70,6 +70,15 @@ def read_tokenization(lang):
return entries return entries
def read_detoken_rules(lang):
loc = path.join(DATA_DIR, lang, 'detokenize')
entries = []
with utf8open(loc) as file_:
for line in file_:
entries.append(line.strip())
return entries
def align_tokens(ref, indices): def align_tokens(ref, indices):
start = 0 start = 0
queue = list(indices) queue = list(indices)