mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 05:37:03 +03:00
Add all symbols in Unicode Currency Symbols block (#8212)
* Add all symbols in Unicode Currency Symbols block In #8102 it came up that the rupee symbol was treated different from dollar / euro / yen symbols. This adds many symbols not already included. * Fix test * Fix training test
This commit is contained in:
parent
fc37715cfb
commit
d1a221a374
|
@ -260,7 +260,10 @@ _units = (
|
||||||
"кг г мг м/с км/ч кПа Па мбар Кб КБ кб Мб МБ мб Гб ГБ гб Тб ТБ тб"
|
"кг г мг м/с км/ч кПа Па мбар Кб КБ кб Мб МБ мб Гб ГБ гб Тб ТБ тб"
|
||||||
"كم كم² كم³ م م² م³ سم سم² سم³ مم مم² مم³ كم غرام جرام جم كغ ملغ كوب اكواب"
|
"كم كم² كم³ م م² م³ سم سم² سم³ مم مم² مم³ كم غرام جرام جم كغ ملغ كوب اكواب"
|
||||||
)
|
)
|
||||||
_currency = r"\$ £ € ¥ ฿ US\$ C\$ A\$ ₽ ﷼ ₴"
|
_currency = (
|
||||||
|
r"\$ £ € ¥ ฿ US\$ C\$ A\$ ₽ ﷼ ₴ ₠ ₡ ₢ ₣ ₤ ₥ ₦ ₧ ₨ ₩ ₪ ₫ € ₭ ₮ ₯ ₰ "
|
||||||
|
r"₱ ₲ ₳ ₴ ₵ ₶ ₷ ₸ ₹ ₺ ₻ ₼ ₽ ₾ ₿"
|
||||||
|
)
|
||||||
|
|
||||||
# These expressions contain various unicode variations, including characters
|
# These expressions contain various unicode variations, including characters
|
||||||
# used in Chinese (see #1333, #1340, #1351) – unless there are cross-language
|
# used in Chinese (see #1333, #1340, #1351) – unless there are cross-language
|
||||||
|
|
|
@ -336,8 +336,8 @@ def test_gold_biluo_additional_whitespace(en_vocab, en_tokenizer):
|
||||||
|
|
||||||
|
|
||||||
def test_gold_biluo_4791(en_vocab, en_tokenizer):
|
def test_gold_biluo_4791(en_vocab, en_tokenizer):
|
||||||
doc = en_tokenizer("I'll return the ₹54 amount")
|
doc = en_tokenizer("I'll return the A54 amount")
|
||||||
gold_words = ["I", "'ll", "return", "the", "₹", "54", "amount"]
|
gold_words = ["I", "'ll", "return", "the", "A", "54", "amount"]
|
||||||
gold_spaces = [False, True, True, True, False, True, False]
|
gold_spaces = [False, True, True, True, False, True, False]
|
||||||
entities = [(16, 19, "MONEY")]
|
entities = [(16, 19, "MONEY")]
|
||||||
example = Example.from_dict(
|
example = Example.from_dict(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user