mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-11 00:32:40 +03:00
Remove debug data normalization for span analysis
As a result of this normalization, `debug data` could show a user tokens that do not exist in their data.
This commit is contained in:
parent
1b2d66f98e
commit
382c296f7d
|
@ -1073,9 +1073,7 @@ def _get_distribution(docs, normalize: bool = True) -> Counter:
|
||||||
word_counts: Counter = Counter()
|
word_counts: Counter = Counter()
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
for token in doc:
|
for token in doc:
|
||||||
# Normalize the text
|
word_counts[token.text] += 1
|
||||||
t = token.text.lower().replace("``", '"').replace("''", '"')
|
|
||||||
word_counts[t] += 1
|
|
||||||
if normalize:
|
if normalize:
|
||||||
total = sum(word_counts.values(), 0.0)
|
total = sum(word_counts.values(), 0.0)
|
||||||
word_counts = Counter({k: v / total for k, v in word_counts.items()})
|
word_counts = Counter({k: v / total for k, v in word_counts.items()})
|
||||||
|
|
Loading…
Reference in New Issue
Block a user