Remove debug data normalization for span analysis

As a result of this normalization, `debug data` could show a user tokens
that do not exist in their data.
This commit is contained in:
Adriane Boyd 2023-12-19 08:56:18 +01:00
parent 1b2d66f98e
commit 382c296f7d

View File

@ -1073,9 +1073,7 @@ def _get_distribution(docs, normalize: bool = True) -> Counter:
word_counts: Counter = Counter()
for doc in docs:
for token in doc:
# Normalize the text
t = token.text.lower().replace("``", '"').replace("''", '"')
word_counts[t] += 1
word_counts[token.text] += 1
if normalize:
total = sum(word_counts.values(), 0.0)
word_counts = Counter({k: v / total for k, v in word_counts.items()})