mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
code for is_currency
This commit is contained in:
parent
3deef1497a
commit
94fb0b75e3
|
@ -21,7 +21,7 @@ IDS = {
|
|||
"IS_QUOTE": IS_QUOTE,
|
||||
"IS_LEFT_PUNCT": IS_LEFT_PUNCT,
|
||||
"IS_RIGHT_PUNCT": IS_RIGHT_PUNCT,
|
||||
"FLAG18": FLAG18,
|
||||
"IS_CURRENCY": IS_CURRENCY,
|
||||
"FLAG19": FLAG19,
|
||||
"FLAG20": FLAG20,
|
||||
"FLAG21": FLAG21,
|
||||
|
|
|
@ -69,6 +69,14 @@ def is_right_punct(text):
|
|||
return text in right_punct
|
||||
|
||||
|
||||
def is_currency(text):
|
||||
# can be overwritten by lang with list of currency words, e.g. dollar, euro
|
||||
for char in text:
|
||||
if unicodedata.category(char) != 'Sc':
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def like_email(text):
|
||||
return bool(_like_email(text))
|
||||
|
||||
|
@ -164,5 +172,6 @@ LEX_ATTRS = {
|
|||
attrs.IS_QUOTE: is_quote,
|
||||
attrs.IS_LEFT_PUNCT: is_left_punct,
|
||||
attrs.IS_RIGHT_PUNCT: is_right_punct,
|
||||
attrs.IS_CURRENCY: is_currency,
|
||||
attrs.LIKE_URL: like_url
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user