* Upd contractions test

2025-10-14 07:46:55 +03:00 · 2014-12-21 20:41:13 +11:00 · 2014-12-21 20:41:13 +11:00 · 199025609f
commit 199025609f
parent 0d9972f4b0
1 changed files with 23 additions and 18 deletions
--- a/tests/test_contractions.py
+++ b/tests/test_contractions.py
@ -1,32 +1,37 @@
 from __future__ import unicode_literals
+import pytest

-from spacy.en import EN
+from spacy.en import English
+
+@pytest.fixture
+def EN():
+    return English(pos_tag=False)


-def test_possess():
-    tokens = EN.tokenize("Mike's")
-    assert EN.lexicon.strings[tokens[0].sic] == "Mike"
-    assert EN.lexicon.strings[tokens[1].sic] == "'s"
+def test_possess(EN):
+    tokens = EN("Mike's")
+    assert EN.vocab.strings[tokens[0].sic] == "Mike"
+    assert EN.vocab.strings[tokens[1].sic] == "'s"
    assert len(tokens) == 2


-def test_apostrophe():
-    tokens = EN.tokenize("schools'")
+def test_apostrophe(EN):
+    tokens = EN("schools'")
    assert len(tokens) == 2
    assert tokens[1].string == "'"
    assert tokens[0].string == "schools"


-def test_LL():
-    tokens = EN.tokenize("we'll")
+def test_LL(EN):
+    tokens = EN("we'll")
    assert len(tokens) == 2
    assert tokens[1].string == "'ll"
    assert tokens[1].lemma == "will"
    assert tokens[0].string == "we"


-def test_aint():
-    tokens = EN.tokenize("ain't")
+def test_aint(EN):
+    tokens = EN("ain't")
    assert len(tokens) == 2
    assert tokens[0].string == "ai"
    assert tokens[0].lemma == "be"
@ -34,19 +39,19 @@ def test_aint():
    assert tokens[1].lemma == "not"


-def test_capitalized():
-    tokens = EN.tokenize("can't")
+def test_capitalized(EN):
+    tokens = EN("can't")
    assert len(tokens) == 2
-    tokens = EN.tokenize("Can't")
+    tokens = EN("Can't")
    assert len(tokens) == 2
-    tokens = EN.tokenize("Ain't")
+    tokens = EN("Ain't")
    assert len(tokens) == 2
    assert tokens[0].string == "Ai"
    assert tokens[0].lemma == "be"


-def test_punct():
-    tokens = EN.tokenize("We've")
+def test_punct(EN):
+    tokens = EN("We've")
    assert len(tokens) == 2
-    tokens = EN.tokenize("``We've")
+    tokens = EN("``We've")
    assert len(tokens) == 3