From 11bffaa1ab71f41684a368673e64a84c708d5f2c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 28 Apr 2016 14:31:41 +0200 Subject: [PATCH] * Add test for regex locale in gold standard --- spacy/tests/gold/test_alignment.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 spacy/tests/gold/test_alignment.py diff --git a/spacy/tests/gold/test_alignment.py b/spacy/tests/gold/test_alignment.py new file mode 100644 index 000000000..ddb578f36 --- /dev/null +++ b/spacy/tests/gold/test_alignment.py @@ -0,0 +1,15 @@ +from spacy.gold import _min_edit_path + + +def test_min_edit_path(): + '''Test problem that arose from Chinese parsing, where alignment didn't match + at the start, depending on which direction followed. The solution was that + a regular expression did not have re.UNICODE flag, causing it to over match. + ''' + + cand_words = [u'\u53cc\u65b9', u'D', u'-', u'RAM'] + gold_words = [u'\u53cc\u65b9', u'D-RAM'] + cost, alignment = _min_edit_path(cand_words, gold_words) + assert alignment[0] == 'M' + cost, alignment = _min_edit_path(gold_words, cand_words) + assert alignment[0] == 'M'