mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-22 10:02:01 +03:00
black format
This commit is contained in:
parent
82ce3cc3f0
commit
c1b4b6d5c6
|
@ -11,8 +11,7 @@ sentences = [
|
|||
"Berapa banyak pelajar yang akan menghadiri majlis perpisahan sekolah?",
|
||||
"Pengeluaran makanan berasal dari beberapa lokasi termasuk Cameron Highlands, Johor Bahru, dan Kuching.",
|
||||
"Syarikat XYZ telah menghasilkan 20,000 unit produk baharu dalam setahun terakhir",
|
||||
"Kuala Lumpur merupakan ibu negara Malaysia."
|
||||
"Kau berada di mana semalam?",
|
||||
"Kuala Lumpur merupakan ibu negara Malaysia." "Kau berada di mana semalam?",
|
||||
"Siapa yang akan memimpin projek itu?",
|
||||
"Siapa perdana menteri Malaysia sekarang?",
|
||||
]
|
||||
|
|
|
@ -30,7 +30,7 @@ _num_words = [
|
|||
"septilion",
|
||||
"oktilion",
|
||||
"nonilion",
|
||||
"desilion"
|
||||
"desilion",
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.parametrize("text", ["(Ma'arif)"])
|
||||
def test_ms_tokenizer_splits_no_special(id_tokenizer, text):
|
||||
tokens = id_tokenizer(text)
|
||||
|
@ -61,7 +62,8 @@ def test_ms_tokenizer_splits_uneven_wrap_interact(id_tokenizer, text):
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,length", [("kerana", 1), ("Mahathir-Anwar", 3), ("Tun Dr. Ismail-Abdul Rahman", 6)]
|
||||
"text,length",
|
||||
[("kerana", 1), ("Mahathir-Anwar", 3), ("Tun Dr. Ismail-Abdul Rahman", 6)],
|
||||
)
|
||||
def test_my_tokenizer_splits_hyphens(ms_tokenizer, text, length):
|
||||
tokens = ms_tokenizer(text)
|
||||
|
|
Loading…
Reference in New Issue
Block a user