From afb22ad491cfb2486393cf319bb182b573d6e35c Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Tue, 6 Feb 2024 14:14:55 +0100
Subject: [PATCH] Remove debug data normalization for span analysis (#13203)

* Remove debug data normalization for span analysis

As a result of this normalization, `debug data` could show a user tokens
that do not exist in their data.

* Update spacy/cli/debug_data.py

---------

Co-authored-by: svlandeg <svlandeg@github.com>
---
 spacy/cli/debug_data.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 714969be1..7a98e6d56 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -1073,8 +1073,7 @@ def _get_distribution(docs, normalize: bool = True) -> Counter:
     word_counts: Counter = Counter()
     for doc in docs:
         for token in doc:
-            # Normalize the text
-            t = token.text.lower().replace("``", '"').replace("''", '"')
+            t = token.text.lower()
             word_counts[t] += 1
     if normalize:
         total = sum(word_counts.values(), 0.0)