code for is_currency

This commit is contained in:
4altinok 2018-02-11 18:51:32 +01:00
parent 3deef1497a
commit 94fb0b75e3
2 changed files with 10 additions and 1 deletions

View File

@ -21,7 +21,7 @@ IDS = {
"IS_QUOTE": IS_QUOTE, "IS_QUOTE": IS_QUOTE,
"IS_LEFT_PUNCT": IS_LEFT_PUNCT, "IS_LEFT_PUNCT": IS_LEFT_PUNCT,
"IS_RIGHT_PUNCT": IS_RIGHT_PUNCT, "IS_RIGHT_PUNCT": IS_RIGHT_PUNCT,
"FLAG18": FLAG18, "IS_CURRENCY": IS_CURRENCY,
"FLAG19": FLAG19, "FLAG19": FLAG19,
"FLAG20": FLAG20, "FLAG20": FLAG20,
"FLAG21": FLAG21, "FLAG21": FLAG21,

View File

@ -69,6 +69,14 @@ def is_right_punct(text):
return text in right_punct return text in right_punct
def is_currency(text):
# can be overwritten by lang with list of currency words, e.g. dollar, euro
for char in text:
if unicodedata.category(char) != 'Sc':
return False
return True
def like_email(text): def like_email(text):
return bool(_like_email(text)) return bool(_like_email(text))
@ -164,5 +172,6 @@ LEX_ATTRS = {
attrs.IS_QUOTE: is_quote, attrs.IS_QUOTE: is_quote,
attrs.IS_LEFT_PUNCT: is_left_punct, attrs.IS_LEFT_PUNCT: is_left_punct,
attrs.IS_RIGHT_PUNCT: is_right_punct, attrs.IS_RIGHT_PUNCT: is_right_punct,
attrs.IS_CURRENCY: is_currency,
attrs.LIKE_URL: like_url attrs.LIKE_URL: like_url
} }