mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 09:56:28 +03:00
* Add function to read detokenization rules
This commit is contained in:
parent
077885637d
commit
43d5964e13
|
@ -70,6 +70,15 @@ def read_tokenization(lang):
|
|||
return entries
|
||||
|
||||
|
||||
def read_detoken_rules(lang):
|
||||
loc = path.join(DATA_DIR, lang, 'detokenize')
|
||||
entries = []
|
||||
with utf8open(loc) as file_:
|
||||
for line in file_:
|
||||
entries.append(line.strip())
|
||||
return entries
|
||||
|
||||
|
||||
def align_tokens(ref, indices):
|
||||
start = 0
|
||||
queue = list(indices)
|
||||
|
|
Loading…
Reference in New Issue
Block a user