spaCy/spacy/tests/lang/lb/test_exceptions.py

# coding: utf-8
from __future__ import unicode_literals

import pytest


@pytest.mark.parametrize("text", ["z.B.", "Jan."])
def test_lb_tokenizer_handles_abbr(lb_tokenizer, text):
    tokens = lb_tokenizer(text)
    assert len(tokens) == 1


@pytest.mark.parametrize("text", ["d'Saach", "d'Kanner", "d’Welt", "d’Suen"])
def test_lb_tokenizer_splits_contractions(lb_tokenizer, text):
    tokens = lb_tokenizer(text)
    assert len(tokens) == 2


def test_lb_tokenizer_handles_exc_in_text(lb_tokenizer):
    text = "Mee 't ass net evident, d'Liewen."
    tokens = lb_tokenizer(text)
    assert len(tokens) == 9
    assert tokens[1].text == "'t"
    assert tokens[1].lemma_ == "et"
-												Initial commit: New language Luxembourgish (lb) (#4424)

* new language: Luxembourgish (lb)

* update

* update

* Update and rename .github/CONTRIBUTOR_AGREEMENT.md to .github/contributors/PeterGilles.md

* Update and rename .github/contributors/PeterGilles.md to .github/CONTRIBUTOR_AGREEMENT.md

* Update norm_exceptions.py

* Delete README.md

* moved test_lemma.py

* deactivated 'lemma_lookup = LOOKUP'

* update

* Update conftest.py

* update

* tests updated

* import unicode_literals

* Update spacy/tests/lang/lb/test_text.py

Co-Authored-By: Ines Montani <ines@ines.io>

* Create PeterGilles.md

											
										
										
											2019-10-14 13:27:50 +03:00
+								# coding: utf-8
 								from __future__ import unicode_literals
 								import pytest
-												Tidy up and auto-format

											
										
										
											2019-12-21 21:04:17 +03:00
-												Initial commit: New language Luxembourgish (lb) (#4424)

* new language: Luxembourgish (lb)

* update

* update

* Update and rename .github/CONTRIBUTOR_AGREEMENT.md to .github/contributors/PeterGilles.md

* Update and rename .github/contributors/PeterGilles.md to .github/CONTRIBUTOR_AGREEMENT.md

* Update norm_exceptions.py

* Delete README.md

* moved test_lemma.py

* deactivated 'lemma_lookup = LOOKUP'

* update

* Update conftest.py

* update

* tests updated

* import unicode_literals

* Update spacy/tests/lang/lb/test_text.py

Co-Authored-By: Ines Montani <ines@ines.io>

* Create PeterGilles.md

											
										
										
											2019-10-14 13:27:50 +03:00
+								@pytest.mark.parametrize("text", ["z.B.", "Jan."])
 								def test_lb_tokenizer_handles_abbr(lb_tokenizer, text):
 								    tokens = lb_tokenizer(text)
 								    assert len(tokens) == 1
-												new tests & tokenization fixes (#4734)

- added some tests for tokenization issues
- fixed some issues with tokenization of words with hyphen infix
- rewrote the "tokenizer_exceptions.py" file (stemming from the German version)
											
										
										
											2019-12-02 01:08:21 +03:00
-												Tidy up and auto-format

											
										
										
											2019-12-21 21:04:17 +03:00
-												new tests & tokenization fixes (#4734)

- added some tests for tokenization issues
- fixed some issues with tokenization of words with hyphen infix
- rewrote the "tokenizer_exceptions.py" file (stemming from the German version)
											
										
										
											2019-12-02 01:08:21 +03:00
+								@pytest.mark.parametrize("text", ["d'Saach", "d'Kanner", "d’Welt", "d’Suen"])
 								def test_lb_tokenizer_splits_contractions(lb_tokenizer, text):
 								    tokens = lb_tokenizer(text)
 								    assert len(tokens) == 2
-												Tidy up and auto-format

											
										
										
											2019-12-21 21:04:17 +03:00
-												new tests & tokenization fixes (#4734)

- added some tests for tokenization issues
- fixed some issues with tokenization of words with hyphen infix
- rewrote the "tokenizer_exceptions.py" file (stemming from the German version)
											
										
										
											2019-12-02 01:08:21 +03:00
+								def test_lb_tokenizer_handles_exc_in_text(lb_tokenizer):
 								    text = "Mee 't ass net evident, d'Liewen."
 								    tokens = lb_tokenizer(text)
 								    assert len(tokens) == 9
 								    assert tokens[1].text == "'t"
 								    assert tokens[1].lemma_ == "et"