Fix noise addition

This commit is contained in:
Matthew Honnibal 2019-08-29 15:39:32 +02:00
parent 32842a3cd4
commit c94fc9edb9

View File

@ -356,22 +356,18 @@ def add_noise(orig, noise_level):
if random.random() >= noise_level: if random.random() >= noise_level:
return orig return orig
elif type(orig) == list: elif type(orig) == list:
corrupted = [_corrupt(word, noise_level, replace_space=False) for word in orig] corrupted = [_corrupt(word, noise_level) for word in orig]
corrupted = [w for w in corrupted if w] corrupted = [w for w in corrupted if w]
return corrupted return corrupted
else: else:
return "".join(_corrupt(c, noise_level, replace_space=False) for c in orig) return "".join(_corrupt(c, noise_level) for c in orig)
def _corrupt(c, noise_level, replace_space=False): def _corrupt(c, noise_level):
if random.random() >= noise_level: if random.random() >= noise_level:
return c return c
elif replace_space and c == " ":
return "\n"
elif replace_space and c == "\n":
return " "
elif c in [".", "'", "!", "?", ","]: elif c in [".", "'", "!", "?", ","]:
return "" return "\n"
else: else:
return c.lower() return c.lower()