Fix test formatting and consistency

2025-12-14 13:44:15 +03:00 · 2017-01-14 13:41:19 +01:00 · 2017-01-14 13:41:19 +01:00 · a89e269a5a
commit a89e269a5a
parent 3424e3a7e5
4 changed files with 16 additions and 16 deletions
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@ -193,7 +193,7 @@ def test_doc_api_runtime_error(en_tokenizer):


 def test_doc_api_right_edge(en_tokenizer):
-    # Test for bug occurring from Unshift action, causing incorrect right edge
+    """Test for bug occurring from Unshift action, causing incorrect right edge"""
    text = "I have proposed to myself, for the sake of such as live under the government of the Romans, to translate those books into the Greek tongue."
    heads = [2, 1, 0, -1, -1, -3, 15, 1, -2, -1, 1, -3, -1, -1, 1, -2, -1, 1,
             -2, -7, 1, -19, 1, -2, -3, 2, 1, -3, -26]
@ -202,7 +202,8 @@ def test_doc_api_right_edge(en_tokenizer):
    doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads)
    assert doc[6].text == 'for'
    subtree = [w.text for w in doc[6].subtree]
-    assert subtree == ['for' , 'the', 'sake', 'of', 'such', 'as', 'live', 'under', 'the', 'government', 'of', 'the', 'Romans', ',']
+    assert subtree == ['for', 'the', 'sake', 'of', 'such', 'as',
+                       'live', 'under', 'the', 'government', 'of', 'the', 'Romans', ',']
    assert doc[6].right_edge.text == ','


--- a/spacy/tests/en/test_punct.py
+++ b/spacy/tests/en/test_punct.py
@ -10,9 +10,6 @@ from ...util import compile_prefix_regex
 from ...language_data import TOKENIZER_PREFIXES


-
-en_search_prefixes = compile_prefix_regex(TOKENIZER_PREFIXES).search
-
 PUNCT_OPEN = ['(', '[', '{', '*']
 PUNCT_CLOSE = [')', ']', '}', '*']
 PUNCT_PAIRED = [('(', ')'),  ('[', ']'), ('{', '}'), ('*', '*')]
@ -99,7 +96,8 @@ def test_tokenizer_splits_double_end_quote(en_tokenizer, text):

@pytest.mark.parametrize('punct_open,punct_close', PUNCT_PAIRED)
@pytest.mark.parametrize('text', ["Hello"])
-def test_tokenizer_splits_open_close_punct(en_tokenizer, punct_open, punct_close, text):
+def test_tokenizer_splits_open_close_punct(en_tokenizer, punct_open,
+                                           punct_close, text):
    tokens = en_tokenizer(punct_open + text + punct_close)
    assert len(tokens) == 3
    assert tokens[0].text == punct_open
@ -108,20 +106,22 @@ def test_tokenizer_splits_open_close_punct(en_tokenizer, punct_open, punct_close


@pytest.mark.parametrize('punct_open,punct_close', PUNCT_PAIRED)
-@pytest.mark.parametrize('punct_open_add,punct_close_add', [("`", "'")])
+@pytest.mark.parametrize('punct_open2,punct_close2', [("`", "'")])
@pytest.mark.parametrize('text', ["Hello"])
-def test_two_different(en_tokenizer, punct_open, punct_close, punct_open_add, punct_close_add, text):
-    tokens = en_tokenizer(punct_open_add + punct_open + text + punct_close + punct_close_add)
+def test_tokenizer_two_diff_punct(en_tokenizer, punct_open, punct_close,
+                                  punct_open2, punct_close2, text):
+    tokens = en_tokenizer(punct_open2 + punct_open + text + punct_close + punct_close2)
    assert len(tokens) == 5
-    assert tokens[0].text == punct_open_add
+    assert tokens[0].text == punct_open2
    assert tokens[1].text == punct_open
    assert tokens[2].text == text
    assert tokens[3].text == punct_close
-    assert tokens[4].text == punct_close_add
+    assert tokens[4].text == punct_close2


@pytest.mark.parametrize('text,punct', [("(can't", "(")])
 def test_tokenizer_splits_pre_punct_regex(text, punct):
+    en_search_prefixes = compile_prefix_regex(TOKENIZER_PREFIXES).search
    match = en_search_prefixes(text)
    assert match.group() == punct

--- a/spacy/tests/en/test_text.py
+++ b/spacy/tests/en/test_text.py
@ -29,8 +29,7 @@ untimely death" of the rapier-tongued Scottish barrister and parliamentarian.
    ("""Yes! "I'd rather have a walk", Ms. Comble sighed. """, 15),
    ("""'Me too!', Mr. P. Delaware cried. """, 11),
    ("They ran about 10km.", 6),
-    # ("But then the 6,000-year ice age came...", 10)
-    ])
+    pytest.mark.xfail(("But then the 6,000-year ice age came...", 10))])
 def test_tokenizer_handles_cnts(en_tokenizer, text, length):
    tokens = en_tokenizer(text)
    assert len(tokens) == length