mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Adding unitest for tokenization in french (with title)
This commit is contained in:
parent
ad8129cb45
commit
44cb486849
|
@ -38,3 +38,26 @@ def test_tokenizer_handles_exc_in_text_2(fr_tokenizer):
|
|||
assert len(tokens) == 11
|
||||
assert tokens[1].text == "après-midi"
|
||||
assert tokens[9].text == "italo-mexicain"
|
||||
|
||||
def test_tokenizer_handles_title(fr_tokenizer):
|
||||
text = "N'est-ce pas génial?"
|
||||
tokens = fr_tokenizer(text)
|
||||
assert len(tokens) == 6
|
||||
assert tokens[0].text == "N'"
|
||||
assert tokens[0].lemma_ == "ne"
|
||||
assert tokens[2].text == "-ce"
|
||||
assert tokens[2].lemma_ == "ce"
|
||||
|
||||
def test_tokenizer_handles_title_2(fr_tokenizer):
|
||||
text = "Est-ce pas génial?"
|
||||
tokens = fr_tokenizer(text)
|
||||
assert len(tokens) == 6
|
||||
assert tokens[0].text == "Est"
|
||||
assert tokens[0].lemma_ == "être"
|
||||
|
||||
def test_tokenizer_handles_title_2(fr_tokenizer):
|
||||
text = "Qu'est-ce que tu fais?"
|
||||
tokens = fr_tokenizer(text)
|
||||
assert len(tokens) == 7
|
||||
assert tokens[0].text == "Qu'"
|
||||
assert tokens[0].lemma_ == "que"
|
Loading…
Reference in New Issue
Block a user