From 42df49133d5a2152e7d0eb02cd7884c4579660be Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 18 Sep 2019 21:54:51 +0200 Subject: [PATCH] Also lower-case in orth variants --- spacy/gold.pyx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 4eaea80ed..b684c470c 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -272,12 +272,17 @@ class GoldCorpus(object): def make_orth_variants(nlp, raw, paragraph_tuples, orth_variant_level=0.0): if random.random() >= orth_variant_level: return raw, paragraph_tuples + if random.random() >= 0.5: + lower = True + raw = raw.lower() ndsv = nlp.Defaults.single_orth_variants ndpv = nlp.Defaults.paired_orth_variants # modify words in paragraph_tuples variant_paragraph_tuples = [] for sent_tuples, brackets in paragraph_tuples: ids, words, tags, heads, labels, ner = sent_tuples + if lower: + words = [w.lower() for w in words] # single variants punct_choices = [random.choice(x["variants"]) for x in ndsv] for word_idx in range(len(words)):