Add tests for corner cases

This commit is contained in:
richardpaulhudson 2023-01-10 09:45:32 +01:00
parent 8d487bf35c
commit 781661ba38

View File

@ -101,14 +101,15 @@ def test_initialize_from_labels():
} }
def test_no_data(): @pytest.mark.parametrize("top_k", (1, 5, 30))
def test_no_data(top_k):
# Test that the lemmatizer provides a nice error when there's no tagging data / labels # Test that the lemmatizer provides a nice error when there's no tagging data / labels
TEXTCAT_DATA = [ TEXTCAT_DATA = [
("I'm so happy.", {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}), ("I'm so happy.", {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}),
("I'm so angry", {"cats": {"POSITIVE": 0.0, "NEGATIVE": 1.0}}), ("I'm so angry", {"cats": {"POSITIVE": 0.0, "NEGATIVE": 1.0}}),
] ]
nlp = English() nlp = English()
nlp.add_pipe("trainable_lemmatizer") nlp.add_pipe("trainable_lemmatizer", config={"top_k": top_k})
nlp.add_pipe("textcat") nlp.add_pipe("textcat")
train_examples = [] train_examples = []
@ -119,10 +120,11 @@ def test_no_data():
nlp.initialize(get_examples=lambda: train_examples) nlp.initialize(get_examples=lambda: train_examples)
def test_incomplete_data(): @pytest.mark.parametrize("top_k", (1, 5, 30))
def test_incomplete_data(top_k):
# Test that the lemmatizer works with incomplete information # Test that the lemmatizer works with incomplete information
nlp = English() nlp = English()
lemmatizer = nlp.add_pipe("trainable_lemmatizer") lemmatizer = nlp.add_pipe("trainable_lemmatizer", config={"top_k": top_k})
lemmatizer.min_tree_freq = 1 lemmatizer.min_tree_freq = 1
train_examples = [] train_examples = []
for t in PARTIAL_DATA: for t in PARTIAL_DATA: