mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Move alignment tests from munge to gold and modernise
This commit is contained in:
parent
094ff7396a
commit
3610d27967
36
spacy/tests/gold/test_lev_align.py
Normal file
36
spacy/tests/gold/test_lev_align.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
# coding: utf-8
|
||||
"""Find the min-cost alignment between two tokenizations"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ...gold import _min_edit_path as min_edit_path
|
||||
from ...gold import align
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.parametrize('cand,gold,path', [
|
||||
(["U.S", ".", "policy"], ["U.S.", "policy"], (0, 'MDM')),
|
||||
(["U.N", ".", "policy"], ["U.S.", "policy"], (1, 'SDM')),
|
||||
(["The", "cat", "sat", "down"], ["The", "cat", "sat", "down"], (0, 'MMMM')),
|
||||
(["cat", "sat", "down"], ["The", "cat", "sat", "down"], (1, 'IMMM')),
|
||||
(["The", "cat", "down"], ["The", "cat", "sat", "down"], (1, 'MMIM')),
|
||||
(["The", "cat", "sag", "down"], ["The", "cat", "sat", "down"], (1, 'MMSM'))])
|
||||
def test_gold_lev_align_edit_path(cand, gold, path):
|
||||
assert min_edit_path(cand, gold) == path
|
||||
|
||||
|
||||
def test_gold_lev_align_edit_path2():
|
||||
cand = ["your", "stuff"]
|
||||
gold = ["you", "r", "stuff"]
|
||||
assert min_edit_path(cand, gold) in [(2, 'ISM'), (2, 'SIM')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('cand,gold,result', [
|
||||
(["U.S", ".", "policy"], ["U.S.", "policy"], [0, None, 1]),
|
||||
(["your", "stuff"], ["you", "r", "stuff"], [None, 2]),
|
||||
(["i", "like", "2", "guys", " ", "well", "id", "just", "come", "straight", "out"],
|
||||
["i", "like", "2", "guys", "well", "i", "d", "just", "come", "straight", "out"],
|
||||
[0, 1, 2, 3, None, 4, None, 7, 8, 9, 10])])
|
||||
def test_gold_lev_align(cand, gold, result):
|
||||
assert align(cand, gold) == result
|
|
@ -1,42 +0,0 @@
|
|||
"""Find the min-cost alignment between two tokenizations"""
|
||||
from spacy.gold import _min_edit_path as min_edit_path
|
||||
from spacy.gold import align
|
||||
|
||||
|
||||
def test_edit_path():
|
||||
cand = ["U.S", ".", "policy"]
|
||||
gold = ["U.S.", "policy"]
|
||||
assert min_edit_path(cand, gold) == (0, 'MDM')
|
||||
cand = ["U.N", ".", "policy"]
|
||||
gold = ["U.S.", "policy"]
|
||||
assert min_edit_path(cand, gold) == (1, 'SDM')
|
||||
cand = ["The", "cat", "sat", "down"]
|
||||
gold = ["The", "cat", "sat", "down"]
|
||||
assert min_edit_path(cand, gold) == (0, 'MMMM')
|
||||
cand = ["cat", "sat", "down"]
|
||||
gold = ["The", "cat", "sat", "down"]
|
||||
assert min_edit_path(cand, gold) == (1, 'IMMM')
|
||||
cand = ["The", "cat", "down"]
|
||||
gold = ["The", "cat", "sat", "down"]
|
||||
assert min_edit_path(cand, gold) == (1, 'MMIM')
|
||||
cand = ["The", "cat", "sag", "down"]
|
||||
gold = ["The", "cat", "sat", "down"]
|
||||
assert min_edit_path(cand, gold) == (1, 'MMSM')
|
||||
cand = ["your", "stuff"]
|
||||
gold = ["you", "r", "stuff"]
|
||||
assert min_edit_path(cand, gold) in [(2, 'ISM'), (2, 'SIM')]
|
||||
|
||||
|
||||
def test_align():
|
||||
cand = ["U.S", ".", "policy"]
|
||||
gold = ["U.S.", "policy"]
|
||||
assert align(cand, gold) == [0, None, 1]
|
||||
cand = ["your", "stuff"]
|
||||
gold = ["you", "r", "stuff"]
|
||||
assert align(cand, gold) == [None, 2]
|
||||
cand = [u'i', u'like', u'2', u'guys', u' ', u'well', u'id', u'just',
|
||||
u'come', u'straight', u'out']
|
||||
gold = [u'i', u'like', u'2', u'guys', u'well', u'i', u'd', u'just', u'come',
|
||||
u'straight', u'out']
|
||||
assert align(cand, gold) == [0, 1, 2, 3, None, 4, None, 7, 8, 9, 10]
|
||||
|
Loading…
Reference in New Issue
Block a user