From fd00de42aaad5d98d7cd868ef0001f70cdf8345b Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 20 Dec 2023 17:39:45 +0100 Subject: [PATCH] Update spacy/cli/debug_data.py --- spacy/cli/debug_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index e2839ed03..7a98e6d56 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -1073,7 +1073,8 @@ def _get_distribution(docs, normalize: bool = True) -> Counter: word_counts: Counter = Counter() for doc in docs: for token in doc: - word_counts[token.text] += 1 + t = token.text.lower() + word_counts[t] += 1 if normalize: total = sum(word_counts.values(), 0.0) word_counts = Counter({k: v / total for k, v in word_counts.items()})