From de6b5ed14dcb036c02e92664365ea2b1fb6cf21c Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Tue, 27 Apr 2021 16:16:35 +0900 Subject: [PATCH] Fix percent unk display in debug data (#7886) * Fix percent unk display This was showing (ratio %), so 10% would show as 0.10%. Fix by multiplying ration by 100. Might want to add a warning if this is over a threshold. * Only show whole-integer percents --- spacy/cli/debug_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index 3351e53fe..1ebf65957 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -173,8 +173,8 @@ def debug_data( ) n_missing_vectors = sum(gold_train_data["words_missing_vectors"].values()) msg.warn( - "{} words in training data without vectors ({:0.2f}%)".format( - n_missing_vectors, n_missing_vectors / gold_train_data["n_words"] + "{} words in training data without vectors ({:.0f}%)".format( + n_missing_vectors, 100 * (n_missing_vectors / gold_train_data["n_words"]) ), ) msg.text(