Remove debug data normalization for span analysis (#13203)

* Remove debug data normalization for span analysis As a result of this normalization, `debug data` could show a user tokens that do not exist in their data. * Update spacy/cli/debug_data.py --------- Co-authored-by: svlandeg <svlandeg@github.com>
2026-01-10 02:31:16 +03:00 · 2024-02-06 14:14:55 +01:00 · 2024-02-06 14:14:55 +01:00 · afb22ad491
commit afb22ad491
parent 1052cba9f3
1 changed files with 1 additions and 2 deletions
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -1073,8 +1073,7 @@ def _get_distribution(docs, normalize: bool = True) -> Counter:
    word_counts: Counter = Counter()
    for doc in docs:
        for token in doc:
-            # Normalize the text
-            t = token.text.lower().replace("``", '"').replace("''", '"')
+            t = token.text.lower()
            word_counts[t] += 1
    if normalize:
        total = sum(word_counts.values(), 0.0)