* More work on reorganising tests, using conftest.py

2025-12-16 22:54:18 +03:00 · 2015-06-07 18:02:24 +02:00 · 2015-06-07 18:02:24 +02:00 · 88041f69d1
commit 88041f69d1
parent 674ee5dde7
27 changed files with 94 additions and 271 deletions
--- a/tests/munge/test_onto_sgml_extract.py
+++ b/tests/munge/test_onto_sgml_extract.py
@ -1,31 +0,0 @@
 import pytest
 import os
 from os import path
 from spacy.munge.read_ontonotes import sgml_extract
 text_data = open(path.join(path.dirname(__file__), 'web_sample1.sgm')).read()
 def test_example_extract():
    article = sgml_extract(text_data)
    assert article['docid'] == 'blogspot.com_alaindewitt_20060924104100_ENG_20060924_104100'
    assert article['doctype'] == 'BLOG TEXT'
    assert article['datetime'] == '2006-09-24T10:41:00'
    assert article['headline'].strip() == 'Devastating Critique of the Arab World by One of Its Own'
    assert article['poster'] == 'Alain DeWitt'
    assert article['postdate'] == '2006-09-24T10:41:00'
    assert article['text'].startswith('Thanks again to my fri'), article['text'][:10]
    assert article['text'].endswith(' tide will turn."'), article['text'][-10:]
    assert '<' not in article['text'], article['text'][:10]
 def test_directory():
    context_dir = '/usr/local/data/OntoNotes5/data/english/metadata/context/wb/sel'
    for fn in os.listdir(context_dir):
        with open(path.join(context_dir, fn)) as file_:
            text = file_.read()
        article = sgml_extract(text)
--- a/tests/munge/test_read_ptb.py
+++ b/tests/munge/test_read_ptb.py
@ -1,46 +0,0 @@
 from spacy.munge import read_ptb
 import pytest
 from os import path
 ptb_loc = path.join(path.dirname(__file__), 'wsj_0001.parse')
 file3_loc = path.join(path.dirname(__file__), 'wsj_0003.parse')
@pytest.fixture
 def ptb_text():
    return open(path.join(ptb_loc)).read()
@pytest.fixture
 def sentence_strings(ptb_text):
    return read_ptb.split(ptb_text)
 def test_split(sentence_strings):
    assert len(sentence_strings) == 2
    assert sentence_strings[0].startswith('(TOP (S (NP-SBJ')
    assert sentence_strings[0].endswith('(. .)))')
    assert sentence_strings[1].startswith('(TOP (S (NP-SBJ')
    assert sentence_strings[1].endswith('(. .)))')
 def test_tree_read(sentence_strings):
    words, brackets = read_ptb.parse(sentence_strings[0])
    assert len(brackets) == 11
    string = ("Pierre Vinken , 61 years old , will join the board as a nonexecutive "
              "director Nov. 29 .")
    word_strings = string.split()
    starts = [s for l, s, e in brackets]
    ends = [e for l, s, e in brackets]
    assert min(starts) == 0
    assert max(ends) == len(words)
    assert brackets[-1] == ('S', 0, len(words))
    assert ('NP-SBJ', 0, 7) in brackets
 def test_traces():
    sent_strings = sentence_strings(open(file3_loc).read())
    words, brackets = read_ptb.parse(sent_strings[0])
    assert len(words) == 36
--- a/tests/parser/test_conjuncts.py
+++ b/tests/parser/test_conjuncts.py
@ -1,7 +1,6 @@
 """Test the Token.conjuncts property"""
 from __future__ import unicode_literals
 from spacy.en import English
 import pytest
@ -9,9 +8,8 @@ def orths(tokens):
    return [t.orth_ for t in tokens]
-def test_simple_two():
+def test_simple_two(EN):
-    nlp = English()
+    tokens = EN('I lost money and pride.', tag=True, parse=True)
    tokens = nlp('I lost money and pride.', tag=True, parse=True)
    pride = tokens[4]
    for t in tokens:
        print t.orth_, t.tag_, t.head.orth_
@ -20,13 +18,12 @@ def test_simple_two():
    assert orths(money.conjuncts) == ['money', 'pride']
-def test_comma_three():
+#def test_comma_three(EN):
-    nlp = English()
+#    tokens = EN('I found my wallet, phone and keys.')
-    tokens = nlp('I found my wallet, phone and keys.')
+#    keys = tokens[-2]
-    keys = tokens[-2]
+#    assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
-    assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
+#    wallet = tokens[3]
-    wallet = tokens[3]
+#    assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']
    assert orths(wallet.conjuncts) == ['wallet', 'phone', 'keys']
 # This is failing due to parse errors
--- a/tests/parser/test_ner.py
+++ b/tests/parser/test_ner.py
@ -1,11 +1,6 @@
 from spacy.en import English
-
+def test_simple_types(EN):
-nlp = English()
+    tokens = EN(u'Mr. Best flew to New York on Saturday morning.')
 def test_simple_types():
    tokens = nlp(u'Mr. Best flew to New York on Saturday morning.')
    ents = list(tokens.ents)
    assert ents[0].start == 1
    assert ents[0].end == 2
--- a/tests/parser/test_parse.py
+++ b/tests/parser/test_parse.py
@ -1,11 +1,7 @@
 from spacy.en import English
 import pytest
 NLP = English()
-
+def test_root(EN):
-def test_root():
+    tokens = EN(u"i don't have other assistance")
    tokens = NLP(u"i don't have other assistance")
    for t in tokens:
        assert t.dep != 0, t.orth_
--- a/tests/parser/test_parse_navigate.py
+++ b/tests/parser/test_parse_navigate.py
@ -2,8 +2,6 @@ from __future__ import unicode_literals
 from os import path
 import codecs
 from spacy.en import English
 import pytest
@ -14,13 +12,8 @@ def sun_text():
    return text
-@pytest.fixture
+def test_consistency(EN, sun_text):
-def nlp():
+    tokens = EN(sun_text)
    return English()
 def test_consistency(nlp, sun_text):
    tokens = nlp(sun_text)
    for head in tokens:
        for child in head.lefts:
            assert child.head is head
@ -28,8 +21,8 @@ def test_consistency(nlp, sun_text):
            assert child.head is head
-def test_child_consistency(nlp, sun_text):
+def test_child_consistency(EN, sun_text):
-    tokens = nlp(sun_text)
+    tokens = EN(sun_text)
    lefts = {}
    rights = {}
@ -60,9 +53,9 @@ def test_child_consistency(nlp, sun_text):
        assert not children
-def test_edges(nlp):
+def test_edges(EN):
    sun_text = u"Chemically, about three quarters of the Sun's mass consists of hydrogen, while the rest is mostly helium."
-    tokens = nlp(sun_text)
+    tokens = EN(sun_text)
    for token in tokens:
        subtree = list(token.subtree)
        debug = '\t'.join((token.orth_, token.left_edge.orth_, subtree[0].orth_))
--- a/tests/parser/test_sbd.py
+++ b/tests/parser/test_sbd.py
@ -1,14 +1,8 @@
 from __future__ import unicode_literals
 from spacy.en import English
 import pytest
@pytest.fixture
 def EN():
    return English()
 def test_single_period(EN):
    string = 'A test sentence.'
    words = EN(string)
--- a/tests/parser/test_subtree.py
+++ b/tests/parser/test_subtree.py
@ -1,10 +1,7 @@
 from __future__ import unicode_literals
 from spacy.en import English
 EN = English()
-def test_subtrees():
+def test_subtrees(EN):
    sent = EN('The four wheels on the bus turned quickly')
    wheels = sent[2]
    bus = sent[5]
--- a/tests/spans/test_merge.py
+++ b/tests/spans/test_merge.py
@ -1,14 +1,8 @@
 from __future__ import unicode_literals
 import pytest
-from spacy.en import English
+def test_merge_tokens(en_nlp):
-
+    tokens = en_nlp(u'Los Angeles start.')
 NLU = English()
 def test_merge_tokens():
    tokens = NLU(u'Los Angeles start.')
    assert len(tokens) == 4
    assert tokens[0].head.orth_ == 'Angeles'
    assert tokens[1].head.orth_ == 'start'
@ -18,8 +12,8 @@ def test_merge_tokens():
    assert tokens[0].head.orth_ == 'start'
-def test_merge_heads():
+def test_merge_heads(en_nlp):
-    tokens = NLU(u'I found a pilates class near work.')
+    tokens = en_nlp(u'I found a pilates class near work.')
    assert len(tokens) == 8
    tokens.merge(tokens[3].idx, tokens[4].idx + len(tokens[4]), tokens[4].tag_,
                 'pilates class', 'O')
@ -34,4 +28,4 @@ def test_merge_heads():
 def test_issue_54():
    text = u'Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1).'
-    tokens = NLU(text, merge_mwes=True)
+    tokens = en_nlp(text, merge_mwes=True)
--- a/tests/spans/test_span.py
+++ b/tests/spans/test_span.py
@ -1,16 +1,12 @@
 from __future__ import unicode_literals
 from spacy.en import English
 import pytest
@pytest.fixture
-def doc():
+def doc(en_nlp):
-    EN = English()
+    return en_nlp('This is a sentence. This is another sentence. And a third.')
    return EN('This is a sentence. This is another sentence. And a third.')
 def test_sent_spans(doc):
--- a/tests/spans/test_times.py
+++ b/tests/spans/test_times.py
@ -1,13 +1,9 @@
 from __future__ import unicode_literals
 from spacy.en import English
 import pytest
 NLU = English()
-
+def test_am_pm(en_nlp):
 def test_am_pm():
    numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']
    variants = ['a.m.', 'am', 'p.m.', 'pm']
    spaces = ['', ' ']
@ -15,7 +11,7 @@ def test_am_pm():
        for var in variants:
            for space in spaces:
                string = u"The meeting was at %s%s%s wasn't it?" % (num, space, var)
-                tokens = NLU(string, merge_mwes=True)
+                tokens = en_nlp(string, merge_mwes=True)
                assert tokens[4].orth_ == '%s%s%s' % (num, space, var)
                ents = list(tokens.ents)
                assert len(ents) == 1
--- a/tests/tagger/test_add_lemmas.py
+++ b/tests/tagger/test_add_lemmas.py
@ -4,9 +4,6 @@ from spacy.en import English
 import pytest
@pytest.fixture
 def EN():
    return English()
@pytest.fixture
 def tagged(EN):
--- a/tests/tagger/test_morph_exceptions.py
+++ b/tests/tagger/test_morph_exceptions.py
@ -5,12 +5,6 @@ import pytest
 from spacy.en import English
@pytest.fixture
 def EN():
    return English()
@pytest.fixture
 def morph_exc():
    return {
@ -18,9 +12,11 @@ def morph_exc():
           }
-def test_load_exc(EN, morph_exc):
+def test_load_exc(morph_exc):
-    EN.tagger.load_morph_exceptions(morph_exc)
+    # Do this local as we want to modify it
-    tokens = EN('I like his style.', tag=True, parse=False)
+    nlp =  English()
    nlp.tagger.load_morph_exceptions(morph_exc)
    tokens = nlp('I like his style.', tag=True, parse=False)
    his = tokens[2]
    assert his.tag_ == 'PRP$'
    assert his.lemma_ == '-PRP-'
--- a/tests/tagger/test_tag_names.py
+++ b/tests/tagger/test_tag_names.py
@ -2,9 +2,8 @@ from spacy.en import English
 import six
-def test_tag_names():
+def test_tag_names(EN):
-    nlp = English()
+    tokens = EN(u'I ate pizzas with anchovies.', parse=False, tag=True)
    tokens = nlp(u'I ate pizzas with anchovies.', parse=True, tag=True)
    pizza = tokens[2]
    assert type(pizza.pos) == int
    assert isinstance(pizza.pos_, six.text_type)
--- a/tests/tokenizer/conftest.py
+++ b/tests/tokenizer/conftest.py
@ -2,10 +2,6 @@ import pytest
 from spacy.en import English
-@pytest.fixture(scope="session")
+@pytest.fixture(scope="module")
 def EN():
    return English(load_vectors=False)
@pytest.fixture(scope="session")
 def en_tokenizer(EN):
    return EN.tokenizer
--- a/tests/tokenizer/sun.txt
+++ b/tests/tokenizer/sun.txt
--- a/tests/tokenizer/test_string_loading.py
+++ b/tests/tokenizer/test_string_loading.py
@ -0,0 +1,9 @@
 """Test suspected freeing of strings"""
 from __future__ import unicode_literals
 def test_one(en_tokenizer):
    tokens = en_tokenizer('Betty Botter bought a pound of butter.')
    assert tokens[0].orth_ == 'Betty'
    tokens2 = en_tokenizer('Betty also bought a pound of butter.')
    assert tokens2[0].orth_ == 'Betty'
--- a/tests/tokenizer/test_wiki_sun.py
+++ b/tests/tokenizer/test_wiki_sun.py
@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 from spacy.en import English
 from spacy.util import utf8open
 import pytest
@ -16,8 +15,7 @@ def sun_txt():
    return utf8open(loc).read()
-def test_tokenize(sun_txt):
+def test_tokenize(sun_txt, EN):
    nlp = English()
    assert len(sun_txt) != 0
    tokens = nlp(sun_txt)
-    assert True
+    assert len(tokens) > 100
--- a/tests/tokens/test_array.py
+++ b/tests/tokens/test_array.py
@ -3,13 +3,10 @@ from __future__ import unicode_literals
 import pytest
 from spacy.en import English
 from spacy.en import attrs
-EN = English()
+def test_attr_of_token(EN):
 def test_attr_of_token():
    text = u'An example sentence.'
    tokens = EN(text, tag=True, parse=False)
    example = EN.vocab[u'example']
@ -18,7 +15,7 @@ def test_attr_of_token():
    assert feats_array[0][0] != feats_array[0][1]
-def test_tag():
+def test_tag(EN):
    text = u'A nice sentence.'
    tokens = EN(text)
    assert tokens[0].tag != tokens[1].tag != tokens[2].tag != tokens[3].tag
@ -29,7 +26,7 @@ def test_tag():
    assert feats_array[3][1] == tokens[3].tag
-def test_dep():
+def test_dep(EN):
    text = u'A nice sentence.'
    tokens = EN(text)
    feats_array = tokens.to_array((attrs.ORTH, attrs.DEP))
--- a/tests/tokens/test_token_api.py
+++ b/tests/tokens/test_token_api.py
@ -6,16 +6,10 @@ from spacy.en.attrs import IS_STOP
 import pytest
 nlp = English()
-
+def test_strings(EN):
-@pytest.fixture
+    tokens = EN(u'Give it back! He pleaded.')
-def token():
+    token = tokens[0]
    tokens = nlp(u'Give it back! He pleaded.')
    return tokens[0]
 def test_strings(token):
    assert token.orth_ == 'Give'
    assert token.lower_ == 'give'
    assert token.shape_ == 'Xxxx'
@ -27,13 +21,16 @@ def test_strings(token):
    assert token.dep_ == 'ROOT'
-def test_flags(token):
+def test_flags(EN):
    tokens = EN(u'Give it back! He pleaded.')
    token = tokens[0]
    assert token.check_flag(IS_ALPHA)
    assert not token.check_flag(IS_DIGIT)
    # TODO: Test more of these, esp. if a bug is found
-def test_single_token_string():
+def test_single_token_string(EN):
-    nlp = English()
+
-    tokens = nlp(u'foobar')
+    tokens = EN(u'foobar')
    assert tokens[0].string == 'foobar'
--- a/tests/tokens/test_token_references.py
+++ b/tests/tokens/test_token_references.py
@ -4,9 +4,11 @@ import gc
 from spacy.en import English
 # Let this have its own instances, as we have to be careful about memory here
 # that's the point, after all
 def get_orphan_token(text, i):
-    nlp = English()
+    nlp = English(load_vectors=False)
    tokens = nlp(text)
    gc.collect()
    token = tokens[i]
--- a/tests/tokens/test_tokens_api.py
+++ b/tests/tokens/test_tokens_api.py
@ -1,16 +1,10 @@
 from __future__ import unicode_literals
 from spacy.en import English
 import pytest
-@pytest.fixture
+def test_getitem(EN):
-def tokens():
+    tokens = EN(u'Give it back! He pleaded.')
    nlp = English()
    return nlp(u'Give it back! He pleaded.')
 def test_getitem(tokens):
    assert tokens[0].orth_ == 'Give'
    assert tokens[-1].orth_ == '.'
    with pytest.raises(IndexError):
--- a/tests/tokens/test_vec.py
+++ b/tests/tokens/test_vec.py
@ -5,11 +5,6 @@ from spacy.en import English
 import pytest
@pytest.fixture
 def EN():
    return English()
 def test_vec(EN):
    hype = EN.vocab['hype']
    assert hype.orth_ == 'hype'
--- a/tests/vocab/test_iter_lexicon.py
+++ b/tests/vocab/test_iter_lexicon.py
@ -1,19 +1,12 @@
 import pytest
-from spacy.en import English
+
 def test_range_iter(en_vocab):
    for i in range(len(en_vocab)):
        lex = en_vocab[i]
-@pytest.fixture
+def test_iter(en_vocab):
 def EN():
    return English()
 def test_range_iter(EN):
    for i in range(len(EN.vocab)):
        lex = EN.vocab[i]
 def test_iter(EN):
    i = 0
-    for lex in EN.vocab:
+    for lex in en_vocab:
        i += 1
--- a/tests/vocab/test_lexeme_flags.py
+++ b/tests/vocab/test_lexeme_flags.py
@ -2,28 +2,22 @@ from __future__ import unicode_literals
 import pytest
 from spacy.en import English
 from spacy.en.attrs import *
-@pytest.fixture
+def test_is_alpha(en_vocab):
-def EN():
+    the = en_vocab['the']
    return English()
 def test_is_alpha(EN):
    the = EN.vocab['the']
    assert the.flags & (1 << IS_ALPHA)
-    year = EN.vocab['1999']
+    year = en_vocab['1999']
    assert not year.flags & (1 << IS_ALPHA)
-    mixed = EN.vocab['hello1']
+    mixed = en_vocab['hello1']
    assert not mixed.flags & (1 << IS_ALPHA)
-def test_is_digit(EN):
+def test_is_digit(en_vocab):
-    the = EN.vocab['the']
+    the = en_vocab['the']
    assert not the.flags & (1 << IS_DIGIT)
-    year = EN.vocab['1999']
+    year = en_vocab['1999']
    assert year.flags & (1 << IS_DIGIT)
-    mixed = EN.vocab['hello1']
+    mixed = en_vocab['hello1']
    assert not mixed.flags & (1 << IS_DIGIT)
--- a/tests/vocab/test_string_loading.py
+++ b/tests/vocab/test_string_loading.py
@ -1,18 +0,0 @@
 """Test suspected freeing of strings"""
 from __future__ import unicode_literals
 import pytest
 from spacy.en import English
@pytest.fixture
 def EN():
    return English()
 def test_one(EN):
    tokens = EN('Betty Botter bought a pound of butter.')
    assert tokens[0].orth_ == 'Betty'
    tokens2 = EN('Betty also bought a pound of butter.')
    assert tokens2[0].orth_ == 'Betty'
--- a/tests/vocab/test_vocab.py
+++ b/tests/vocab/test_vocab.py
@ -1,34 +1,27 @@
 from __future__ import unicode_literals
 import pytest
-from spacy.en import English
+
 def test_neq(en_vocab):
    addr = en_vocab['Hello']
    assert en_vocab['bye'].orth != addr.orth
-@pytest.fixture
+def test_eq(en_vocab):
-def EN():
+    addr = en_vocab['Hello']
-    return English()
+    assert en_vocab['Hello'].orth == addr.orth
-def test_neq(EN):
+def test_case_neq(en_vocab):
-    addr = EN.vocab['Hello']
+    addr = en_vocab['Hello']
-    assert EN.vocab['bye'].orth != addr.orth
+    assert en_vocab['hello'].orth != addr.orth
-def test_eq(EN):
+def test_punct_neq(en_vocab):
-    addr = EN.vocab['Hello']
+    addr = en_vocab['Hello']
-    assert EN.vocab['Hello'].orth == addr.orth
+    assert en_vocab['Hello,'].orth != addr.orth
-def test_case_neq(EN):
+def test_shape_attr(en_vocab):
-    addr = EN.vocab['Hello']
+    example = en_vocab['example']
    assert EN.vocab['hello'].orth != addr.orth
 def test_punct_neq(EN):
    addr = EN.vocab['Hello']
    assert EN.vocab['Hello,'].orth != addr.orth
 def test_shape_attr(EN):
    example = EN.vocab['example']
    assert example.orth != example.shape