Remove debug data normalization for span analysis (#13203)

* Remove debug data normalization for span analysis

As a result of this normalization, `debug data` could show a user tokens
that do not exist in their data.

* Update spacy/cli/debug_data.py

---------

Co-authored-by: svlandeg <svlandeg@github.com>
This commit is contained in:
Adriane Boyd 2024-02-06 14:14:55 +01:00 committed by GitHub
parent 1052cba9f3
commit afb22ad491
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1073,8 +1073,7 @@ def _get_distribution(docs, normalize: bool = True) -> Counter:
word_counts: Counter = Counter()
for doc in docs:
for token in doc:
# Normalize the text
t = token.text.lower().replace("``", '"').replace("''", '"')
t = token.text.lower()
word_counts[t] += 1
if normalize:
total = sum(word_counts.values(), 0.0)