Also lower-case in orth variants

This commit is contained in:
Matthew Honnibal 2019-09-18 21:54:51 +02:00
parent 19d99fc9e7
commit 42df49133d

View File

@ -272,12 +272,17 @@ class GoldCorpus(object):
def make_orth_variants(nlp, raw, paragraph_tuples, orth_variant_level=0.0):
if random.random() >= orth_variant_level:
return raw, paragraph_tuples
if random.random() >= 0.5:
lower = True
raw = raw.lower()
ndsv = nlp.Defaults.single_orth_variants
ndpv = nlp.Defaults.paired_orth_variants
# modify words in paragraph_tuples
variant_paragraph_tuples = []
for sent_tuples, brackets in paragraph_tuples:
ids, words, tags, heads, labels, ner = sent_tuples
if lower:
words = [w.lower() for w in words]
# single variants
punct_choices = [random.choice(x["variants"]) for x in ndsv]
for word_idx in range(len(words)):