From d37dca72dddf7574a73439ffb16814b40094afee Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 7 Jun 2015 16:49:46 +0200 Subject: [PATCH] * Reorganize tests --- tests/_depr_group_by.py | 35 ---- tests/depr_test_ner.py | 156 ------------------ tests/{ => munge}/test_align.py | 0 tests/{ => munge}/test_detokenize.py | 0 tests/{ => munge}/test_lev_align.py | 0 tests/{ => munge}/test_onto_ner.py | 0 tests/{ => munge}/test_onto_sgml_extract.py | 0 tests/{ => munge}/test_read_ptb.py | 0 tests/my_test.py | 0 tests/{ => parser}/test_conjuncts.py | 9 +- tests/{ => parser}/test_ner.py | 0 tests/{ => parser}/test_parse.py | 0 tests/{ => parser}/test_parse_navigate.py | 0 tests/{ => parser}/test_sbd.py | 0 tests/{ => parser}/test_subtree.py | 0 tests/{ => spans}/test_merge.py | 0 tests/{ => spans}/test_span.py | 0 tests/{ => spans}/test_times.py | 0 tests/{ => tagger}/test_add_lemmas.py | 0 tests/{ => tagger}/test_lemmatizer.py | 0 tests/{ => tagger}/test_morph_exceptions.py | 0 tests/{ => tagger}/test_tag_names.py | 0 tests/{ => tokenizer}/test_contractions.py | 0 tests/{ => tokenizer}/test_emoticons.py | 0 tests/{ => tokenizer}/test_indices.py | 0 tests/{ => tokenizer}/test_infix.py | 0 tests/{ => tokenizer}/test_only_punct.py | 0 tests/{ => tokenizer}/test_post_punct.py | 0 tests/{ => tokenizer}/test_pre_punct.py | 0 tests/{ => tokenizer}/test_special_affix.py | 0 tests/{ => tokenizer}/test_surround_punct.py | 0 tests/{ => tokenizer}/test_tokenizer.py | 0 .../{ => tokenizer}/test_tokens_from_list.py | 0 tests/{ => tokenizer}/test_whitespace.py | 0 tests/{ => tokens}/test_array.py | 0 tests/{ => tokens}/test_token.py | 0 tests/{ => tokens}/test_token_api.py | 0 tests/{ => tokens}/test_token_references.py | 0 tests/{ => tokens}/test_tokens_api.py | 0 tests/{ => tokens}/test_vec.py | 0 tests/{ => vocab}/test_asciify.py | 0 tests/{ => vocab}/test_flag_features.py | 0 tests/{ => vocab}/test_intern.py | 0 tests/{ => vocab}/test_is_punct.py | 0 tests/{ => vocab}/test_iter_lexicon.py | 0 tests/{ => vocab}/test_lexeme_flags.py | 0 tests/{ => vocab}/test_number.py | 0 tests/{ => vocab}/test_shape.py | 0 tests/{ => vocab}/test_string_loading.py | 0 tests/{ => vocab}/test_urlish.py | 0 tests/{ => vocab}/test_vocab.py | 0 51 files changed, 6 insertions(+), 194 deletions(-) delete mode 100644 tests/_depr_group_by.py delete mode 100644 tests/depr_test_ner.py rename tests/{ => munge}/test_align.py (100%) rename tests/{ => munge}/test_detokenize.py (100%) rename tests/{ => munge}/test_lev_align.py (100%) rename tests/{ => munge}/test_onto_ner.py (100%) rename tests/{ => munge}/test_onto_sgml_extract.py (100%) rename tests/{ => munge}/test_read_ptb.py (100%) delete mode 100644 tests/my_test.py rename tests/{ => parser}/test_conjuncts.py (79%) rename tests/{ => parser}/test_ner.py (100%) rename tests/{ => parser}/test_parse.py (100%) rename tests/{ => parser}/test_parse_navigate.py (100%) rename tests/{ => parser}/test_sbd.py (100%) rename tests/{ => parser}/test_subtree.py (100%) rename tests/{ => spans}/test_merge.py (100%) rename tests/{ => spans}/test_span.py (100%) rename tests/{ => spans}/test_times.py (100%) rename tests/{ => tagger}/test_add_lemmas.py (100%) rename tests/{ => tagger}/test_lemmatizer.py (100%) rename tests/{ => tagger}/test_morph_exceptions.py (100%) rename tests/{ => tagger}/test_tag_names.py (100%) rename tests/{ => tokenizer}/test_contractions.py (100%) rename tests/{ => tokenizer}/test_emoticons.py (100%) rename tests/{ => tokenizer}/test_indices.py (100%) rename tests/{ => tokenizer}/test_infix.py (100%) rename tests/{ => tokenizer}/test_only_punct.py (100%) rename tests/{ => tokenizer}/test_post_punct.py (100%) rename tests/{ => tokenizer}/test_pre_punct.py (100%) rename tests/{ => tokenizer}/test_special_affix.py (100%) rename tests/{ => tokenizer}/test_surround_punct.py (100%) rename tests/{ => tokenizer}/test_tokenizer.py (100%) rename tests/{ => tokenizer}/test_tokens_from_list.py (100%) rename tests/{ => tokenizer}/test_whitespace.py (100%) rename tests/{ => tokens}/test_array.py (100%) rename tests/{ => tokens}/test_token.py (100%) rename tests/{ => tokens}/test_token_api.py (100%) rename tests/{ => tokens}/test_token_references.py (100%) rename tests/{ => tokens}/test_tokens_api.py (100%) rename tests/{ => tokens}/test_vec.py (100%) rename tests/{ => vocab}/test_asciify.py (100%) rename tests/{ => vocab}/test_flag_features.py (100%) rename tests/{ => vocab}/test_intern.py (100%) rename tests/{ => vocab}/test_is_punct.py (100%) rename tests/{ => vocab}/test_iter_lexicon.py (100%) rename tests/{ => vocab}/test_lexeme_flags.py (100%) rename tests/{ => vocab}/test_number.py (100%) rename tests/{ => vocab}/test_shape.py (100%) rename tests/{ => vocab}/test_string_loading.py (100%) rename tests/{ => vocab}/test_urlish.py (100%) rename tests/{ => vocab}/test_vocab.py (100%) diff --git a/tests/_depr_group_by.py b/tests/_depr_group_by.py deleted file mode 100644 index 9f83c5ce9..000000000 --- a/tests/_depr_group_by.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import unicode_literals - -import pytest - -from spacy import en -from spacy.lexeme import lex_of - -from spacy import LEX, NORM, SHAPE, LAST3 - - -def test_group_by_lex(): - tokens = en.tokenize("I like the red one and I like the blue one") - names, hashes, groups = tokens.group_by(LEX) - - assert len(groups[0]) == 2 - assert en.unhash(lex_of(groups[0][0])) == 'I' - assert names[0] == 'I' - assert len(groups[1]) == 2 - assert en.unhash(lex_of(groups[1][0])) == 'like' - assert names[1] == "like" - assert len(groups[2]) == 2 - assert len(groups[3]) == 1 - - -def test_group_by_last3(): - tokens = en.tokenize("I the blithe swarthy mate ate on the filthy deck") - names, hashes, groups = tokens.group_by(LAST3) - - assert len(groups[0]) == 1 - assert en.unhash(lex_of(groups[0][0])) == 'I' - assert len(groups[1]) == 3 - assert en.unhash(lex_of(groups[1][0])) == 'the' - assert len(groups[2]) == 2 - assert len(groups[3]) == 2 - assert len(groups[4]) == 1 diff --git a/tests/depr_test_ner.py b/tests/depr_test_ner.py deleted file mode 100644 index 80e643dd1..000000000 --- a/tests/depr_test_ner.py +++ /dev/null @@ -1,156 +0,0 @@ -from __future__ import unicode_literals - -from spacy.ner.pystate import PyState -import pytest - - -@pytest.fixture -def labels(): - ent_types = ['LOC', 'MISC', 'ORG', 'PER'] - moves = ['B', 'I', 'L', 'U'] - labels = ['NULL', 'EOL', 'O'] - for move in moves: - for ent_type in ent_types: - labels.append('%s-%s' % (move, ent_type)) - return labels - - -@pytest.fixture -def sentence(): - return "Ms. Haag plays Elianti .".split() - - -@pytest.fixture -def state(labels, sentence): - return PyState(labels, len(sentence)) - - -def test_begin(state, sentence): - assert state.n_ents == 0 - assert state.i == 0 - state.transition('B-PER') - assert state.n_ents == 0 - assert state.i == 1 - assert state.open_entity - assert state.ent == {'start': 0, 'label': 4, 'end': 0} - assert state.is_valid('I-PER') - assert not state.is_valid('I-LOC') - assert state.is_valid('L-PER') - assert not state.is_valid('L-LOC') - assert not state.is_valid('O') - assert not state.is_valid('U-PER') - - -def test_in(state, sentence): - state.transition('B-PER') - assert state.n_ents == 0 - state.transition('I-PER') - assert state.n_ents == 0 - assert state.i == 2 - assert state.is_valid('I-PER') - assert state.is_valid('L-PER') - assert not state.is_valid('B-PER') - assert not state.is_valid('I-LOC') - assert not state.is_valid('L-LOC') - assert not state.is_valid('U-PER') - assert not state.is_valid('O') - - -def test_last(state, sentence): - state.transition('B-PER') - assert state.n_ents == 0 - state.transition('L-PER') - assert state.n_ents == 1 - assert state.i == 2 - assert not state.open_entity - assert state.is_valid('B-PER') - assert state.is_valid('B-LOC') - assert state.is_valid('U-PER') - assert state.is_valid('U-LOC') - assert state.is_valid('O') - assert not state.is_valid('L-PER') - assert not state.is_valid('I-PER') - - -def test_unit(state, sentence): - assert state.n_ents == 0 - state.transition('U-PER') - assert state.n_ents == 1 - assert state.i == 1 - assert not state.open_entity - assert state.is_valid('B-PER') - assert state.is_valid('B-LOC') - assert state.is_valid('U-PER') - assert state.is_valid('U-LOC') - assert state.is_valid('O') - assert not state.is_valid('I-PER') - assert not state.is_valid('L-PER') - - -def test_out(state, sentence): - assert state.n_ents == 0 - state.transition('U-PER') - assert state.n_ents == 1 - assert state.i == 1 - state.transition('O') - assert state.i == 2 - assert not state.open_entity - assert state.is_valid('B-PER') - assert state.is_valid('B-LOC') - assert state.is_valid('U-PER') - assert state.is_valid('U-LOC') - assert state.is_valid('O') - assert not state.is_valid('I-PER') - assert not state.is_valid('L-PER') - - -@pytest.fixture -def golds(sentence): - g = ['B-PER', 'L-PER', 'O', 'U-PER', 'O'] - assert len(g) == len(sentence) - return g - - -def test_oracle_gold(state, sentence, golds): - state.set_golds(golds) - assert state.is_gold('B-PER') - assert not state.is_gold('B-LOC') - assert not state.is_gold('I-PER') - assert not state.is_gold('L-PER') - assert not state.is_gold('U-PER') - assert not state.is_gold('O') - state.transition('B-PER') - assert state.is_gold('L-PER') - state.transition('L-PER') - assert state.is_gold('O') - assert not state.is_gold('B-PER') - state.transition('O') - assert not state.is_gold('B-PER') - assert not state.is_gold('O') - assert state.is_gold('U-PER') - state.transition('U-PER') - assert state.is_gold('O') - state.transition('O') - assert state.i == len(sentence) - - -def test_oracle_miss_entity(state, sentence, golds): - state.set_golds(golds) - state.transition('O') - assert not state.is_gold('L-PER') - assert not state.is_gold('U-PER') - assert not state.is_gold('I-PER') - assert not state.is_gold('B-PER') - assert state.is_gold('O') - state.transition('O') - state.transition('O') - assert state.is_gold('U-PER') - - -def test_oracle_extend_entity(state, sentence, golds): - state.set_golds(golds) - state.transition('B-PER') - assert not state.is_gold('I-PER') - state.transition('I-PER') - assert state.is_gold('L-PER') - assert not state.is_gold('I-PER') diff --git a/tests/test_align.py b/tests/munge/test_align.py similarity index 100% rename from tests/test_align.py rename to tests/munge/test_align.py diff --git a/tests/test_detokenize.py b/tests/munge/test_detokenize.py similarity index 100% rename from tests/test_detokenize.py rename to tests/munge/test_detokenize.py diff --git a/tests/test_lev_align.py b/tests/munge/test_lev_align.py similarity index 100% rename from tests/test_lev_align.py rename to tests/munge/test_lev_align.py diff --git a/tests/test_onto_ner.py b/tests/munge/test_onto_ner.py similarity index 100% rename from tests/test_onto_ner.py rename to tests/munge/test_onto_ner.py diff --git a/tests/test_onto_sgml_extract.py b/tests/munge/test_onto_sgml_extract.py similarity index 100% rename from tests/test_onto_sgml_extract.py rename to tests/munge/test_onto_sgml_extract.py diff --git a/tests/test_read_ptb.py b/tests/munge/test_read_ptb.py similarity index 100% rename from tests/test_read_ptb.py rename to tests/munge/test_read_ptb.py diff --git a/tests/my_test.py b/tests/my_test.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/test_conjuncts.py b/tests/parser/test_conjuncts.py similarity index 79% rename from tests/test_conjuncts.py rename to tests/parser/test_conjuncts.py index b6d7cc934..4f2ea5be3 100644 --- a/tests/test_conjuncts.py +++ b/tests/parser/test_conjuncts.py @@ -4,22 +4,25 @@ from __future__ import unicode_literals from spacy.en import English import pytest -NLU = English() def orths(tokens): return [t.orth_ for t in tokens] def test_simple_two(): - tokens = NLU('I lost money and pride.', tag=True, parse=False) + nlp = English() + tokens = nlp('I lost money and pride.', tag=True, parse=True) pride = tokens[4] + for t in tokens: + print t.orth_, t.tag_, t.head.orth_ assert orths(pride.conjuncts) == ['money', 'pride'] money = tokens[2] assert orths(money.conjuncts) == ['money', 'pride'] def test_comma_three(): - tokens = NLU('I found my wallet, phone and keys.') + nlp = English() + tokens = nlp('I found my wallet, phone and keys.') keys = tokens[-2] assert orths(keys.conjuncts) == ['wallet', 'phone', 'keys'] wallet = tokens[3] diff --git a/tests/test_ner.py b/tests/parser/test_ner.py similarity index 100% rename from tests/test_ner.py rename to tests/parser/test_ner.py diff --git a/tests/test_parse.py b/tests/parser/test_parse.py similarity index 100% rename from tests/test_parse.py rename to tests/parser/test_parse.py diff --git a/tests/test_parse_navigate.py b/tests/parser/test_parse_navigate.py similarity index 100% rename from tests/test_parse_navigate.py rename to tests/parser/test_parse_navigate.py diff --git a/tests/test_sbd.py b/tests/parser/test_sbd.py similarity index 100% rename from tests/test_sbd.py rename to tests/parser/test_sbd.py diff --git a/tests/test_subtree.py b/tests/parser/test_subtree.py similarity index 100% rename from tests/test_subtree.py rename to tests/parser/test_subtree.py diff --git a/tests/test_merge.py b/tests/spans/test_merge.py similarity index 100% rename from tests/test_merge.py rename to tests/spans/test_merge.py diff --git a/tests/test_span.py b/tests/spans/test_span.py similarity index 100% rename from tests/test_span.py rename to tests/spans/test_span.py diff --git a/tests/test_times.py b/tests/spans/test_times.py similarity index 100% rename from tests/test_times.py rename to tests/spans/test_times.py diff --git a/tests/test_add_lemmas.py b/tests/tagger/test_add_lemmas.py similarity index 100% rename from tests/test_add_lemmas.py rename to tests/tagger/test_add_lemmas.py diff --git a/tests/test_lemmatizer.py b/tests/tagger/test_lemmatizer.py similarity index 100% rename from tests/test_lemmatizer.py rename to tests/tagger/test_lemmatizer.py diff --git a/tests/test_morph_exceptions.py b/tests/tagger/test_morph_exceptions.py similarity index 100% rename from tests/test_morph_exceptions.py rename to tests/tagger/test_morph_exceptions.py diff --git a/tests/test_tag_names.py b/tests/tagger/test_tag_names.py similarity index 100% rename from tests/test_tag_names.py rename to tests/tagger/test_tag_names.py diff --git a/tests/test_contractions.py b/tests/tokenizer/test_contractions.py similarity index 100% rename from tests/test_contractions.py rename to tests/tokenizer/test_contractions.py diff --git a/tests/test_emoticons.py b/tests/tokenizer/test_emoticons.py similarity index 100% rename from tests/test_emoticons.py rename to tests/tokenizer/test_emoticons.py diff --git a/tests/test_indices.py b/tests/tokenizer/test_indices.py similarity index 100% rename from tests/test_indices.py rename to tests/tokenizer/test_indices.py diff --git a/tests/test_infix.py b/tests/tokenizer/test_infix.py similarity index 100% rename from tests/test_infix.py rename to tests/tokenizer/test_infix.py diff --git a/tests/test_only_punct.py b/tests/tokenizer/test_only_punct.py similarity index 100% rename from tests/test_only_punct.py rename to tests/tokenizer/test_only_punct.py diff --git a/tests/test_post_punct.py b/tests/tokenizer/test_post_punct.py similarity index 100% rename from tests/test_post_punct.py rename to tests/tokenizer/test_post_punct.py diff --git a/tests/test_pre_punct.py b/tests/tokenizer/test_pre_punct.py similarity index 100% rename from tests/test_pre_punct.py rename to tests/tokenizer/test_pre_punct.py diff --git a/tests/test_special_affix.py b/tests/tokenizer/test_special_affix.py similarity index 100% rename from tests/test_special_affix.py rename to tests/tokenizer/test_special_affix.py diff --git a/tests/test_surround_punct.py b/tests/tokenizer/test_surround_punct.py similarity index 100% rename from tests/test_surround_punct.py rename to tests/tokenizer/test_surround_punct.py diff --git a/tests/test_tokenizer.py b/tests/tokenizer/test_tokenizer.py similarity index 100% rename from tests/test_tokenizer.py rename to tests/tokenizer/test_tokenizer.py diff --git a/tests/test_tokens_from_list.py b/tests/tokenizer/test_tokens_from_list.py similarity index 100% rename from tests/test_tokens_from_list.py rename to tests/tokenizer/test_tokens_from_list.py diff --git a/tests/test_whitespace.py b/tests/tokenizer/test_whitespace.py similarity index 100% rename from tests/test_whitespace.py rename to tests/tokenizer/test_whitespace.py diff --git a/tests/test_array.py b/tests/tokens/test_array.py similarity index 100% rename from tests/test_array.py rename to tests/tokens/test_array.py diff --git a/tests/test_token.py b/tests/tokens/test_token.py similarity index 100% rename from tests/test_token.py rename to tests/tokens/test_token.py diff --git a/tests/test_token_api.py b/tests/tokens/test_token_api.py similarity index 100% rename from tests/test_token_api.py rename to tests/tokens/test_token_api.py diff --git a/tests/test_token_references.py b/tests/tokens/test_token_references.py similarity index 100% rename from tests/test_token_references.py rename to tests/tokens/test_token_references.py diff --git a/tests/test_tokens_api.py b/tests/tokens/test_tokens_api.py similarity index 100% rename from tests/test_tokens_api.py rename to tests/tokens/test_tokens_api.py diff --git a/tests/test_vec.py b/tests/tokens/test_vec.py similarity index 100% rename from tests/test_vec.py rename to tests/tokens/test_vec.py diff --git a/tests/test_asciify.py b/tests/vocab/test_asciify.py similarity index 100% rename from tests/test_asciify.py rename to tests/vocab/test_asciify.py diff --git a/tests/test_flag_features.py b/tests/vocab/test_flag_features.py similarity index 100% rename from tests/test_flag_features.py rename to tests/vocab/test_flag_features.py diff --git a/tests/test_intern.py b/tests/vocab/test_intern.py similarity index 100% rename from tests/test_intern.py rename to tests/vocab/test_intern.py diff --git a/tests/test_is_punct.py b/tests/vocab/test_is_punct.py similarity index 100% rename from tests/test_is_punct.py rename to tests/vocab/test_is_punct.py diff --git a/tests/test_iter_lexicon.py b/tests/vocab/test_iter_lexicon.py similarity index 100% rename from tests/test_iter_lexicon.py rename to tests/vocab/test_iter_lexicon.py diff --git a/tests/test_lexeme_flags.py b/tests/vocab/test_lexeme_flags.py similarity index 100% rename from tests/test_lexeme_flags.py rename to tests/vocab/test_lexeme_flags.py diff --git a/tests/test_number.py b/tests/vocab/test_number.py similarity index 100% rename from tests/test_number.py rename to tests/vocab/test_number.py diff --git a/tests/test_shape.py b/tests/vocab/test_shape.py similarity index 100% rename from tests/test_shape.py rename to tests/vocab/test_shape.py diff --git a/tests/test_string_loading.py b/tests/vocab/test_string_loading.py similarity index 100% rename from tests/test_string_loading.py rename to tests/vocab/test_string_loading.py diff --git a/tests/test_urlish.py b/tests/vocab/test_urlish.py similarity index 100% rename from tests/test_urlish.py rename to tests/vocab/test_urlish.py diff --git a/tests/test_vocab.py b/tests/vocab/test_vocab.py similarity index 100% rename from tests/test_vocab.py rename to tests/vocab/test_vocab.py