From a779275a594011f1fd1e49825d79d773273f3bdf Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 30 Aug 2014 20:57:43 +0200 Subject: [PATCH] * Add canon_case function --- spacy/orth.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/spacy/orth.py b/spacy/orth.py index 847d7eae7..d9b4abecc 100644 --- a/spacy/orth.py +++ b/spacy/orth.py @@ -41,7 +41,16 @@ def can_tag(name, thresh): # String features def canon_case(string, prob, cluster, case_stats, tag_stats): - return string + upper_pc = case_stats['upper'] + title_pc = case_stats['title'] + lower_pc = case_stats['lower'] + + if upper_pc >= lower_pc and upper_pc >= title_pc: + return string.upper() + elif title_pc >= lower_pc: + return string.title() + else: + return string.lower() def word_shape(string, *args):