Move alignment tests from munge to gold and modernise

2025-09-22 20:16:43 +03:00 · 2017-01-13 01:33:31 +01:00 · 2017-01-13 01:33:31 +01:00 · 3610d27967
commit 3610d27967
parent 094ff7396a
2 changed files with 36 additions and 42 deletions
--- a/spacy/tests/gold/test_lev_align.py
+++ b/spacy/tests/gold/test_lev_align.py
@ -0,0 +1,36 @@
+# coding: utf-8
+"""Find the min-cost alignment between two tokenizations"""
+
+from __future__ import unicode_literals
+
+from ...gold import _min_edit_path as min_edit_path
+from ...gold import align
+
+import pytest
+
+
+@pytest.mark.parametrize('cand,gold,path', [
+    (["U.S", ".", "policy"], ["U.S.", "policy"], (0, 'MDM')),
+    (["U.N", ".", "policy"], ["U.S.", "policy"], (1, 'SDM')),
+    (["The", "cat", "sat", "down"], ["The", "cat", "sat", "down"], (0, 'MMMM')),
+    (["cat", "sat", "down"], ["The", "cat", "sat", "down"], (1, 'IMMM')),
+    (["The", "cat", "down"], ["The", "cat", "sat", "down"], (1, 'MMIM')),
+    (["The", "cat", "sag", "down"], ["The", "cat", "sat", "down"], (1, 'MMSM'))])
+def test_gold_lev_align_edit_path(cand, gold, path):
+    assert min_edit_path(cand, gold) == path
+
+
+def test_gold_lev_align_edit_path2():
+    cand = ["your", "stuff"]
+    gold = ["you", "r", "stuff"]
+    assert min_edit_path(cand, gold) in [(2, 'ISM'), (2, 'SIM')]
+
+
+@pytest.mark.parametrize('cand,gold,result', [
+    (["U.S", ".", "policy"], ["U.S.", "policy"], [0, None, 1]),
+    (["your", "stuff"], ["you", "r", "stuff"], [None, 2]),
+    (["i", "like", "2", "guys", "   ", "well", "id", "just", "come", "straight", "out"],
+     ["i", "like", "2", "guys", "well", "i", "d", "just", "come", "straight", "out"],
+     [0, 1, 2, 3, None, 4, None, 7, 8, 9, 10])])
+def test_gold_lev_align(cand, gold, result):
+    assert align(cand, gold) == result
--- a/spacy/tests/munge/test_lev_align.py
+++ b/spacy/tests/munge/test_lev_align.py
@ -1,42 +0,0 @@
-"""Find the min-cost alignment between two tokenizations"""
-from spacy.gold import _min_edit_path as min_edit_path
-from spacy.gold import align
-
-
-def test_edit_path():
-    cand = ["U.S", ".", "policy"]
-    gold = ["U.S.", "policy"]
-    assert min_edit_path(cand, gold) == (0, 'MDM')
-    cand = ["U.N", ".", "policy"]
-    gold = ["U.S.", "policy"]
-    assert min_edit_path(cand, gold) == (1, 'SDM')
-    cand = ["The", "cat", "sat", "down"]
-    gold = ["The", "cat", "sat", "down"]
-    assert min_edit_path(cand, gold) == (0, 'MMMM')
-    cand = ["cat", "sat", "down"]
-    gold = ["The", "cat", "sat", "down"]
-    assert min_edit_path(cand, gold) == (1, 'IMMM')
-    cand = ["The", "cat", "down"]
-    gold = ["The", "cat", "sat", "down"]
-    assert min_edit_path(cand, gold) == (1, 'MMIM')
-    cand = ["The", "cat", "sag", "down"]
-    gold = ["The", "cat", "sat", "down"]
-    assert min_edit_path(cand, gold) == (1, 'MMSM')
-    cand = ["your", "stuff"]
-    gold = ["you", "r", "stuff"]
-    assert min_edit_path(cand, gold) in [(2, 'ISM'), (2, 'SIM')]
-
-
-def test_align():
-    cand = ["U.S", ".", "policy"]
-    gold = ["U.S.", "policy"]
-    assert align(cand, gold) == [0, None, 1]
-    cand = ["your", "stuff"]
-    gold = ["you", "r", "stuff"]
-    assert align(cand, gold) == [None, 2]
-    cand = [u'i', u'like', u'2', u'guys', u'   ', u'well', u'id', u'just',
-            u'come', u'straight', u'out']
-    gold = [u'i', u'like', u'2', u'guys', u'well', u'i', u'd', u'just', u'come',
-            u'straight', u'out']
-    assert align(cand, gold) == [0, 1, 2, 3, None, 4, None, 7, 8, 9, 10]
-