mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Remove debug data normalization for span analysis (#13203)
* Remove debug data normalization for span analysis As a result of this normalization, `debug data` could show a user tokens that do not exist in their data. * Update spacy/cli/debug_data.py --------- Co-authored-by: svlandeg <svlandeg@github.com>
This commit is contained in:
parent
1052cba9f3
commit
afb22ad491
|
@ -1073,8 +1073,7 @@ def _get_distribution(docs, normalize: bool = True) -> Counter:
|
|||
word_counts: Counter = Counter()
|
||||
for doc in docs:
|
||||
for token in doc:
|
||||
# Normalize the text
|
||||
t = token.text.lower().replace("``", '"').replace("''", '"')
|
||||
t = token.text.lower()
|
||||
word_counts[t] += 1
|
||||
if normalize:
|
||||
total = sum(word_counts.values(), 0.0)
|
||||
|
|
Loading…
Reference in New Issue
Block a user