spaCy/spacy/tests/gold/test_alignment.py
2016-04-28 14:31:41 +02:00

16 lines
601 B
Python

from spacy.gold import _min_edit_path
def test_min_edit_path():
'''Test problem that arose from Chinese parsing, where alignment didn't match
at the start, depending on which direction followed. The solution was that
a regular expression did not have re.UNICODE flag, causing it to over match.
'''
cand_words = [u'\u53cc\u65b9', u'D', u'-', u'RAM']
gold_words = [u'\u53cc\u65b9', u'D-RAM']
cost, alignment = _min_edit_path(cand_words, gold_words)
assert alignment[0] == 'M'
cost, alignment = _min_edit_path(gold_words, cand_words)
assert alignment[0] == 'M'