From d37dca72dddf7574a73439ffb16814b40094afee Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 7 Jun 2015 16:49:46 +0200
Subject: [PATCH] * Reorganize tests

---
 tests/_depr_group_by.py                       |  35 ----
 tests/depr_test_ner.py                        | 156 ------------------
 tests/{ => munge}/test_align.py               |   0
 tests/{ => munge}/test_detokenize.py          |   0
 tests/{ => munge}/test_lev_align.py           |   0
 tests/{ => munge}/test_onto_ner.py            |   0
 tests/{ => munge}/test_onto_sgml_extract.py   |   0
 tests/{ => munge}/test_read_ptb.py            |   0
 tests/my_test.py                              |   0
 tests/{ => parser}/test_conjuncts.py          |   9 +-
 tests/{ => parser}/test_ner.py                |   0
 tests/{ => parser}/test_parse.py              |   0
 tests/{ => parser}/test_parse_navigate.py     |   0
 tests/{ => parser}/test_sbd.py                |   0
 tests/{ => parser}/test_subtree.py            |   0
 tests/{ => spans}/test_merge.py               |   0
 tests/{ => spans}/test_span.py                |   0
 tests/{ => spans}/test_times.py               |   0
 tests/{ => tagger}/test_add_lemmas.py         |   0
 tests/{ => tagger}/test_lemmatizer.py         |   0
 tests/{ => tagger}/test_morph_exceptions.py   |   0
 tests/{ => tagger}/test_tag_names.py          |   0
 tests/{ => tokenizer}/test_contractions.py    |   0
 tests/{ => tokenizer}/test_emoticons.py       |   0
 tests/{ => tokenizer}/test_indices.py         |   0
 tests/{ => tokenizer}/test_infix.py           |   0
 tests/{ => tokenizer}/test_only_punct.py      |   0
 tests/{ => tokenizer}/test_post_punct.py      |   0
 tests/{ => tokenizer}/test_pre_punct.py       |   0
 tests/{ => tokenizer}/test_special_affix.py   |   0
 tests/{ => tokenizer}/test_surround_punct.py  |   0
 tests/{ => tokenizer}/test_tokenizer.py       |   0
 .../{ => tokenizer}/test_tokens_from_list.py  |   0
 tests/{ => tokenizer}/test_whitespace.py      |   0
 tests/{ => tokens}/test_array.py              |   0
 tests/{ => tokens}/test_token.py              |   0
 tests/{ => tokens}/test_token_api.py          |   0
 tests/{ => tokens}/test_token_references.py   |   0
 tests/{ => tokens}/test_tokens_api.py         |   0
 tests/{ => tokens}/test_vec.py                |   0
 tests/{ => vocab}/test_asciify.py             |   0
 tests/{ => vocab}/test_flag_features.py       |   0
 tests/{ => vocab}/test_intern.py              |   0
 tests/{ => vocab}/test_is_punct.py            |   0
 tests/{ => vocab}/test_iter_lexicon.py        |   0
 tests/{ => vocab}/test_lexeme_flags.py        |   0
 tests/{ => vocab}/test_number.py              |   0
 tests/{ => vocab}/test_shape.py               |   0
 tests/{ => vocab}/test_string_loading.py      |   0
 tests/{ => vocab}/test_urlish.py              |   0
 tests/{ => vocab}/test_vocab.py               |   0
 51 files changed, 6 insertions(+), 194 deletions(-)
 delete mode 100644 tests/_depr_group_by.py
 delete mode 100644 tests/depr_test_ner.py
 rename tests/{ => munge}/test_align.py (100%)
 rename tests/{ => munge}/test_detokenize.py (100%)
 rename tests/{ => munge}/test_lev_align.py (100%)
 rename tests/{ => munge}/test_onto_ner.py (100%)
 rename tests/{ => munge}/test_onto_sgml_extract.py (100%)
 rename tests/{ => munge}/test_read_ptb.py (100%)
 delete mode 100644 tests/my_test.py
 rename tests/{ => parser}/test_conjuncts.py (79%)
 rename tests/{ => parser}/test_ner.py (100%)
 rename tests/{ => parser}/test_parse.py (100%)
 rename tests/{ => parser}/test_parse_navigate.py (100%)
 rename tests/{ => parser}/test_sbd.py (100%)
 rename tests/{ => parser}/test_subtree.py (100%)
 rename tests/{ => spans}/test_merge.py (100%)
 rename tests/{ => spans}/test_span.py (100%)
 rename tests/{ => spans}/test_times.py (100%)
 rename tests/{ => tagger}/test_add_lemmas.py (100%)
 rename tests/{ => tagger}/test_lemmatizer.py (100%)
 rename tests/{ => tagger}/test_morph_exceptions.py (100%)
 rename tests/{ => tagger}/test_tag_names.py (100%)
 rename tests/{ => tokenizer}/test_contractions.py (100%)
 rename tests/{ => tokenizer}/test_emoticons.py (100%)
 rename tests/{ => tokenizer}/test_indices.py (100%)
 rename tests/{ => tokenizer}/test_infix.py (100%)
 rename tests/{ => tokenizer}/test_only_punct.py (100%)
 rename tests/{ => tokenizer}/test_post_punct.py (100%)
 rename tests/{ => tokenizer}/test_pre_punct.py (100%)
 rename tests/{ => tokenizer}/test_special_affix.py (100%)
 rename tests/{ => tokenizer}/test_surround_punct.py (100%)
 rename tests/{ => tokenizer}/test_tokenizer.py (100%)
 rename tests/{ => tokenizer}/test_tokens_from_list.py (100%)
 rename tests/{ => tokenizer}/test_whitespace.py (100%)
 rename tests/{ => tokens}/test_array.py (100%)
 rename tests/{ => tokens}/test_token.py (100%)
 rename tests/{ => tokens}/test_token_api.py (100%)
 rename tests/{ => tokens}/test_token_references.py (100%)
 rename tests/{ => tokens}/test_tokens_api.py (100%)
 rename tests/{ => tokens}/test_vec.py (100%)
 rename tests/{ => vocab}/test_asciify.py (100%)
 rename tests/{ => vocab}/test_flag_features.py (100%)
 rename tests/{ => vocab}/test_intern.py (100%)
 rename tests/{ => vocab}/test_is_punct.py (100%)
 rename tests/{ => vocab}/test_iter_lexicon.py (100%)
 rename tests/{ => vocab}/test_lexeme_flags.py (100%)
 rename tests/{ => vocab}/test_number.py (100%)
 rename tests/{ => vocab}/test_shape.py (100%)
 rename tests/{ => vocab}/test_string_loading.py (100%)
 rename tests/{ => vocab}/test_urlish.py (100%)
 rename tests/{ => vocab}/test_vocab.py (100%)

diff --git a/tests/_depr_group_by.py b/tests/_depr_group_by.py
deleted file mode 100644
index 9f83c5ce9..000000000
--- a/tests/_depr_group_by.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from __future__ import unicode_literals
-
-import pytest
-
-from spacy import en
-from spacy.lexeme import lex_of
-
-from spacy import LEX, NORM, SHAPE, LAST3
-
-
-def test_group_by_lex():
-    tokens = en.tokenize("I like the red one and I like the blue one")
-    names, hashes, groups = tokens.group_by(LEX)
-
-    assert len(groups[0]) == 2
-    assert en.unhash(lex_of(groups[0][0])) == 'I'
-    assert names[0] == 'I'
-    assert len(groups[1]) == 2
-    assert en.unhash(lex_of(groups[1][0])) == 'like'
-    assert names[1] == "like"
-    assert len(groups[2]) == 2
-    assert len(groups[3]) == 1
-
-
-def test_group_by_last3():
-    tokens = en.tokenize("I the blithe swarthy mate ate on the filthy deck")
-    names, hashes, groups = tokens.group_by(LAST3)
-
-    assert len(groups[0]) == 1
-    assert en.unhash(lex_of(groups[0][0])) == 'I'
-    assert len(groups[1]) == 3
-    assert en.unhash(lex_of(groups[1][0])) == 'the'
-    assert len(groups[2]) == 2
-    assert len(groups[3]) == 2
-    assert len(groups[4]) == 1
diff --git a/tests/depr_test_ner.py b/tests/depr_test_ner.py
deleted file mode 100644
index 80e643dd1..000000000
--- a/tests/depr_test_ner.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from __future__ import unicode_literals
-
-from spacy.ner.pystate import PyState
-import pytest
-
-
-@pytest.fixture
-def labels():
-    ent_types = ['LOC', 'MISC', 'ORG', 'PER']
-    moves = ['B', 'I', 'L', 'U']
-    labels = ['NULL', 'EOL', 'O']
-    for move in moves:
-        for ent_type in ent_types:
-            labels.append('%s-%s' % (move, ent_type))
-    return labels
-
-
-@pytest.fixture
-def sentence():
-    return "Ms. Haag plays Elianti .".split()
-
-
-@pytest.fixture
-def state(labels, sentence):
-    return PyState(labels, len(sentence))
-
-
-def test_begin(state, sentence):
-    assert state.n_ents == 0
-    assert state.i == 0
-    state.transition('B-PER')
-    assert state.n_ents == 0
-    assert state.i == 1
-    assert state.open_entity
-    assert state.ent == {'start': 0, 'label': 4, 'end': 0}
-    assert state.is_valid('I-PER')
-    assert not state.is_valid('I-LOC')
-    assert state.is_valid('L-PER')
-    assert not state.is_valid('L-LOC')
-    assert not state.is_valid('O')
-    assert not state.is_valid('U-PER')
-
-
-def test_in(state, sentence):
-    state.transition('B-PER')
-    assert state.n_ents == 0
-    state.transition('I-PER')
-    assert state.n_ents == 0
-    assert state.i == 2
-    assert state.is_valid('I-PER')
-    assert state.is_valid('L-PER')
-    assert not state.is_valid('B-PER')
-    assert not state.is_valid('I-LOC')
-    assert not state.is_valid('L-LOC')
-    assert not state.is_valid('U-PER')
-    assert not state.is_valid('O')
-
-
-def test_last(state, sentence):
-    state.transition('B-PER')
-    assert state.n_ents == 0
-    state.transition('L-PER')
-    assert state.n_ents == 1
-    assert state.i == 2
-    assert not state.open_entity
-    assert state.is_valid('B-PER')
-    assert state.is_valid('B-LOC')
-    assert state.is_valid('U-PER')
-    assert state.is_valid('U-LOC')
-    assert state.is_valid('O')
-    assert not state.is_valid('L-PER')
-    assert not state.is_valid('I-PER')
-
-
-def test_unit(state, sentence):
-    assert state.n_ents == 0
-    state.transition('U-PER')
-    assert state.n_ents == 1
-    assert state.i == 1
-    assert not state.open_entity
-    assert state.is_valid('B-PER')
-    assert state.is_valid('B-LOC')
-    assert state.is_valid('U-PER')
-    assert state.is_valid('U-LOC')
-    assert state.is_valid('O')
-    assert not state.is_valid('I-PER')
-    assert not state.is_valid('L-PER')
-
-
-def test_out(state, sentence):
-    assert state.n_ents == 0
-    state.transition('U-PER')
-    assert state.n_ents == 1
-    assert state.i == 1
-    state.transition('O')
-    assert state.i == 2
-    assert not state.open_entity
-    assert state.is_valid('B-PER')
-    assert state.is_valid('B-LOC')
-    assert state.is_valid('U-PER')
-    assert state.is_valid('U-LOC')
-    assert state.is_valid('O')
-    assert not state.is_valid('I-PER')
-    assert not state.is_valid('L-PER')
-
-
-@pytest.fixture
-def golds(sentence):
-    g = ['B-PER', 'L-PER', 'O', 'U-PER', 'O']
-    assert len(g) == len(sentence)
-    return g
-
-
-def test_oracle_gold(state, sentence, golds):
-    state.set_golds(golds)
-    assert state.is_gold('B-PER')
-    assert not state.is_gold('B-LOC')
-    assert not state.is_gold('I-PER')
-    assert not state.is_gold('L-PER')
-    assert not state.is_gold('U-PER')
-    assert not state.is_gold('O')
-    state.transition('B-PER')
-    assert state.is_gold('L-PER')
-    state.transition('L-PER')
-    assert state.is_gold('O')
-    assert not state.is_gold('B-PER')
-    state.transition('O')
-    assert not state.is_gold('B-PER')
-    assert not state.is_gold('O')
-    assert state.is_gold('U-PER')
-    state.transition('U-PER')
-    assert state.is_gold('O')
-    state.transition('O')
-    assert state.i == len(sentence)
-
-
-def test_oracle_miss_entity(state, sentence, golds):
-    state.set_golds(golds)
-    state.transition('O')
-    assert not state.is_gold('L-PER')
-    assert not state.is_gold('U-PER')
-    assert not state.is_gold('I-PER')
-    assert not state.is_gold('B-PER')
-    assert state.is_gold('O')
-    state.transition('O')
-    state.transition('O')
-    assert state.is_gold('U-PER')
-
-
-def test_oracle_extend_entity(state, sentence, golds):
-    state.set_golds(golds)
-    state.transition('B-PER')
-    assert not state.is_gold('I-PER')
-    state.transition('I-PER')
-    assert state.is_gold('L-PER')
-    assert not state.is_gold('I-PER')
diff --git a/tests/test_align.py b/tests/munge/test_align.py
similarity index 100%
rename from tests/test_align.py
rename to tests/munge/test_align.py
diff --git a/tests/test_detokenize.py b/tests/munge/test_detokenize.py
similarity index 100%
rename from tests/test_detokenize.py
rename to tests/munge/test_detokenize.py
diff --git a/tests/test_lev_align.py b/tests/munge/test_lev_align.py
similarity index 100%
rename from tests/test_lev_align.py
rename to tests/munge/test_lev_align.py
diff --git a/tests/test_onto_ner.py b/tests/munge/test_onto_ner.py
similarity index 100%
rename from tests/test_onto_ner.py
rename to tests/munge/test_onto_ner.py
diff --git a/tests/test_onto_sgml_extract.py b/tests/munge/test_onto_sgml_extract.py
similarity index 100%
rename from tests/test_onto_sgml_extract.py
rename to tests/munge/test_onto_sgml_extract.py
diff --git a/tests/test_read_ptb.py b/tests/munge/test_read_ptb.py
similarity index 100%
rename from tests/test_read_ptb.py
rename to tests/munge/test_read_ptb.py
diff --git a/tests/my_test.py b/tests/my_test.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tests/test_conjuncts.py b/tests/parser/test_conjuncts.py
similarity index 79%
rename from tests/test_conjuncts.py
rename to tests/parser/test_conjuncts.py
index b6d7cc934..4f2ea5be3 100644
--- a/tests/test_conjuncts.py
+++ b/tests/parser/test_conjuncts.py
@@ -4,22 +4,25 @@ from __future__ import unicode_literals
 from spacy.en import English
 import pytest
 
-NLU = English()
 
 def orths(tokens):
     return [t.orth_ for t in tokens]
 
 
 def test_simple_two():
-    tokens = NLU('I lost money and pride.', tag=True, parse=False)
+    nlp = English()
+    tokens = nlp('I lost money and pride.', tag=True, parse=True)
     pride = tokens[4]
+    for t in tokens:
+        print t.orth_, t.tag_, t.head.orth_
     assert orths(pride.conjuncts) == ['money', 'pride']
     money = tokens[2]
     assert orths(money.conjuncts) == ['money', 'pride']
 
 
 def test_comma_three():
-    tokens = NLU('I found my wallet, phone and keys.')
+    nlp = English()
+    tokens = nlp('I found my wallet, phone and keys.')
     keys = tokens[-2]
     assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys']
     wallet = tokens[3]
diff --git a/tests/test_ner.py b/tests/parser/test_ner.py
similarity index 100%
rename from tests/test_ner.py
rename to tests/parser/test_ner.py
diff --git a/tests/test_parse.py b/tests/parser/test_parse.py
similarity index 100%
rename from tests/test_parse.py
rename to tests/parser/test_parse.py
diff --git a/tests/test_parse_navigate.py b/tests/parser/test_parse_navigate.py
similarity index 100%
rename from tests/test_parse_navigate.py
rename to tests/parser/test_parse_navigate.py
diff --git a/tests/test_sbd.py b/tests/parser/test_sbd.py
similarity index 100%
rename from tests/test_sbd.py
rename to tests/parser/test_sbd.py
diff --git a/tests/test_subtree.py b/tests/parser/test_subtree.py
similarity index 100%
rename from tests/test_subtree.py
rename to tests/parser/test_subtree.py
diff --git a/tests/test_merge.py b/tests/spans/test_merge.py
similarity index 100%
rename from tests/test_merge.py
rename to tests/spans/test_merge.py
diff --git a/tests/test_span.py b/tests/spans/test_span.py
similarity index 100%
rename from tests/test_span.py
rename to tests/spans/test_span.py
diff --git a/tests/test_times.py b/tests/spans/test_times.py
similarity index 100%
rename from tests/test_times.py
rename to tests/spans/test_times.py
diff --git a/tests/test_add_lemmas.py b/tests/tagger/test_add_lemmas.py
similarity index 100%
rename from tests/test_add_lemmas.py
rename to tests/tagger/test_add_lemmas.py
diff --git a/tests/test_lemmatizer.py b/tests/tagger/test_lemmatizer.py
similarity index 100%
rename from tests/test_lemmatizer.py
rename to tests/tagger/test_lemmatizer.py
diff --git a/tests/test_morph_exceptions.py b/tests/tagger/test_morph_exceptions.py
similarity index 100%
rename from tests/test_morph_exceptions.py
rename to tests/tagger/test_morph_exceptions.py
diff --git a/tests/test_tag_names.py b/tests/tagger/test_tag_names.py
similarity index 100%
rename from tests/test_tag_names.py
rename to tests/tagger/test_tag_names.py
diff --git a/tests/test_contractions.py b/tests/tokenizer/test_contractions.py
similarity index 100%
rename from tests/test_contractions.py
rename to tests/tokenizer/test_contractions.py
diff --git a/tests/test_emoticons.py b/tests/tokenizer/test_emoticons.py
similarity index 100%
rename from tests/test_emoticons.py
rename to tests/tokenizer/test_emoticons.py
diff --git a/tests/test_indices.py b/tests/tokenizer/test_indices.py
similarity index 100%
rename from tests/test_indices.py
rename to tests/tokenizer/test_indices.py
diff --git a/tests/test_infix.py b/tests/tokenizer/test_infix.py
similarity index 100%
rename from tests/test_infix.py
rename to tests/tokenizer/test_infix.py
diff --git a/tests/test_only_punct.py b/tests/tokenizer/test_only_punct.py
similarity index 100%
rename from tests/test_only_punct.py
rename to tests/tokenizer/test_only_punct.py
diff --git a/tests/test_post_punct.py b/tests/tokenizer/test_post_punct.py
similarity index 100%
rename from tests/test_post_punct.py
rename to tests/tokenizer/test_post_punct.py
diff --git a/tests/test_pre_punct.py b/tests/tokenizer/test_pre_punct.py
similarity index 100%
rename from tests/test_pre_punct.py
rename to tests/tokenizer/test_pre_punct.py
diff --git a/tests/test_special_affix.py b/tests/tokenizer/test_special_affix.py
similarity index 100%
rename from tests/test_special_affix.py
rename to tests/tokenizer/test_special_affix.py
diff --git a/tests/test_surround_punct.py b/tests/tokenizer/test_surround_punct.py
similarity index 100%
rename from tests/test_surround_punct.py
rename to tests/tokenizer/test_surround_punct.py
diff --git a/tests/test_tokenizer.py b/tests/tokenizer/test_tokenizer.py
similarity index 100%
rename from tests/test_tokenizer.py
rename to tests/tokenizer/test_tokenizer.py
diff --git a/tests/test_tokens_from_list.py b/tests/tokenizer/test_tokens_from_list.py
similarity index 100%
rename from tests/test_tokens_from_list.py
rename to tests/tokenizer/test_tokens_from_list.py
diff --git a/tests/test_whitespace.py b/tests/tokenizer/test_whitespace.py
similarity index 100%
rename from tests/test_whitespace.py
rename to tests/tokenizer/test_whitespace.py
diff --git a/tests/test_array.py b/tests/tokens/test_array.py
similarity index 100%
rename from tests/test_array.py
rename to tests/tokens/test_array.py
diff --git a/tests/test_token.py b/tests/tokens/test_token.py
similarity index 100%
rename from tests/test_token.py
rename to tests/tokens/test_token.py
diff --git a/tests/test_token_api.py b/tests/tokens/test_token_api.py
similarity index 100%
rename from tests/test_token_api.py
rename to tests/tokens/test_token_api.py
diff --git a/tests/test_token_references.py b/tests/tokens/test_token_references.py
similarity index 100%
rename from tests/test_token_references.py
rename to tests/tokens/test_token_references.py
diff --git a/tests/test_tokens_api.py b/tests/tokens/test_tokens_api.py
similarity index 100%
rename from tests/test_tokens_api.py
rename to tests/tokens/test_tokens_api.py
diff --git a/tests/test_vec.py b/tests/tokens/test_vec.py
similarity index 100%
rename from tests/test_vec.py
rename to tests/tokens/test_vec.py
diff --git a/tests/test_asciify.py b/tests/vocab/test_asciify.py
similarity index 100%
rename from tests/test_asciify.py
rename to tests/vocab/test_asciify.py
diff --git a/tests/test_flag_features.py b/tests/vocab/test_flag_features.py
similarity index 100%
rename from tests/test_flag_features.py
rename to tests/vocab/test_flag_features.py
diff --git a/tests/test_intern.py b/tests/vocab/test_intern.py
similarity index 100%
rename from tests/test_intern.py
rename to tests/vocab/test_intern.py
diff --git a/tests/test_is_punct.py b/tests/vocab/test_is_punct.py
similarity index 100%
rename from tests/test_is_punct.py
rename to tests/vocab/test_is_punct.py
diff --git a/tests/test_iter_lexicon.py b/tests/vocab/test_iter_lexicon.py
similarity index 100%
rename from tests/test_iter_lexicon.py
rename to tests/vocab/test_iter_lexicon.py
diff --git a/tests/test_lexeme_flags.py b/tests/vocab/test_lexeme_flags.py
similarity index 100%
rename from tests/test_lexeme_flags.py
rename to tests/vocab/test_lexeme_flags.py
diff --git a/tests/test_number.py b/tests/vocab/test_number.py
similarity index 100%
rename from tests/test_number.py
rename to tests/vocab/test_number.py
diff --git a/tests/test_shape.py b/tests/vocab/test_shape.py
similarity index 100%
rename from tests/test_shape.py
rename to tests/vocab/test_shape.py
diff --git a/tests/test_string_loading.py b/tests/vocab/test_string_loading.py
similarity index 100%
rename from tests/test_string_loading.py
rename to tests/vocab/test_string_loading.py
diff --git a/tests/test_urlish.py b/tests/vocab/test_urlish.py
similarity index 100%
rename from tests/test_urlish.py
rename to tests/vocab/test_urlish.py
diff --git a/tests/test_vocab.py b/tests/vocab/test_vocab.py
similarity index 100%
rename from tests/test_vocab.py
rename to tests/vocab/test_vocab.py