mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Fix noise addition
This commit is contained in:
parent
32842a3cd4
commit
c94fc9edb9
|
@ -356,22 +356,18 @@ def add_noise(orig, noise_level):
|
||||||
if random.random() >= noise_level:
|
if random.random() >= noise_level:
|
||||||
return orig
|
return orig
|
||||||
elif type(orig) == list:
|
elif type(orig) == list:
|
||||||
corrupted = [_corrupt(word, noise_level, replace_space=False) for word in orig]
|
corrupted = [_corrupt(word, noise_level) for word in orig]
|
||||||
corrupted = [w for w in corrupted if w]
|
corrupted = [w for w in corrupted if w]
|
||||||
return corrupted
|
return corrupted
|
||||||
else:
|
else:
|
||||||
return "".join(_corrupt(c, noise_level, replace_space=False) for c in orig)
|
return "".join(_corrupt(c, noise_level) for c in orig)
|
||||||
|
|
||||||
|
|
||||||
def _corrupt(c, noise_level, replace_space=False):
|
def _corrupt(c, noise_level):
|
||||||
if random.random() >= noise_level:
|
if random.random() >= noise_level:
|
||||||
return c
|
return c
|
||||||
elif replace_space and c == " ":
|
|
||||||
return "\n"
|
|
||||||
elif replace_space and c == "\n":
|
|
||||||
return " "
|
|
||||||
elif c in [".", "'", "!", "?", ","]:
|
elif c in [".", "'", "!", "?", ","]:
|
||||||
return ""
|
return "\n"
|
||||||
else:
|
else:
|
||||||
return c.lower()
|
return c.lower()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user