mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	* Use isort with Black profile * isort all the things * Fix import cycles as a result of import sorting * Add DOCBIN_ALL_ATTRS type definition * Add isort to requirements * Remove isort from build dependencies check * Typo
		
			
				
	
	
		
			75 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			75 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pytest
 | |
| 
 | |
| from spacy.matcher import levenshtein
 | |
| from spacy.matcher.levenshtein import levenshtein_compare
 | |
| 
 | |
| 
 | |
| # empty string plus 10 random ASCII, 10 random unicode, and 2 random long tests
 | |
| # from polyleven
 | |
| @pytest.mark.parametrize(
 | |
|     "dist,a,b",
 | |
|     [
 | |
|         (0, "", ""),
 | |
|         (4, "bbcb", "caba"),
 | |
|         (3, "abcb", "cacc"),
 | |
|         (3, "aa", "ccc"),
 | |
|         (1, "cca", "ccac"),
 | |
|         (1, "aba", "aa"),
 | |
|         (4, "bcbb", "abac"),
 | |
|         (3, "acbc", "bba"),
 | |
|         (3, "cbba", "a"),
 | |
|         (2, "bcc", "ba"),
 | |
|         (4, "aaa", "ccbb"),
 | |
|         (3, "うあい", "いいうい"),
 | |
|         (2, "あううい", "うあい"),
 | |
|         (3, "いういい", "うううあ"),
 | |
|         (2, "うい", "あいあ"),
 | |
|         (2, "いあい", "いう"),
 | |
|         (1, "いい", "あいい"),
 | |
|         (3, "あうあ", "いいああ"),
 | |
|         (4, "いあうう", "ううああ"),
 | |
|         (3, "いあいい", "ういああ"),
 | |
|         (3, "いいああ", "ううあう"),
 | |
|         (
 | |
|             166,
 | |
|             "TCTGGGCACGGATTCGTCAGATTCCATGTCCATATTTGAGGCTCTTGCAGGCAAAATTTGGGCATGTGAACTCCTTATAGTCCCCGTGC",
 | |
|             "ATATGGATTGGGGGCATTCAAAGATACGGTTTCCCTTTCTTCAGTTTCGCGCGGCGCACGTCCGGGTGCGAGCCAGTTCGTCTTACTCACATTGTCGACTTCACGAATCGCGCATGATGTGCTTAGCCTGTACTTACGAACGAACTTTCGGTCCAAATACATTCTATCAACACCGAGGTATCCGTGCCACACGCCGAAGCTCGACCGTGTTCGTTGAGAGGTGGAAATGGTAAAAGATGAACATAGTC",
 | |
|         ),
 | |
|         (
 | |
|             111,
 | |
|             "GGTTCGGCCGAATTCATAGAGCGTGGTAGTCGACGGTATCCCGCCTGGTAGGGGCCCCTTCTACCTAGCGGAAGTTTGTCAGTACTCTATAACACGAGGGCCTCTCACACCCTAGATCGTCCAGCCACTCGAAGATCGCAGCACCCTTACAGAAAGGCATTAATGTTTCTCCTAGCACTTGTGCAATGGTGAAGGAGTGATG",
 | |
|             "CGTAACACTTCGCGCTACTGGGCTGCAACGTCTTGGGCATACATGCAAGATTATCTAATGCAAGCTTGAGCCCCGCTTGCGGAATTTCCCTAATCGGGGTCCCTTCCTGTTACGATAAGGACGCGTGCACT",
 | |
|         ),
 | |
|     ],
 | |
| )
 | |
| def test_levenshtein(dist, a, b):
 | |
|     assert levenshtein(a, b) == dist
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize(
 | |
|     "a,b,fuzzy,expected",
 | |
|     [
 | |
|         ("a", "a", 1, True),
 | |
|         ("a", "a", 0, True),
 | |
|         ("a", "a", -1, True),
 | |
|         ("a", "ab", 1, True),
 | |
|         ("a", "ab", 0, False),
 | |
|         ("a", "ab", -1, True),
 | |
|         ("ab", "ac", 1, True),
 | |
|         ("ab", "ac", -1, True),
 | |
|         ("abc", "cde", 4, True),
 | |
|         ("abc", "cde", -1, False),
 | |
|         ("abcdef", "cdefgh", 4, True),
 | |
|         ("abcdef", "cdefgh", 3, False),
 | |
|         ("abcdef", "cdefgh", -1, False),  # default (2 for length 6)
 | |
|         ("abcdefgh", "cdefghijk", 5, True),
 | |
|         ("abcdefgh", "cdefghijk", 4, False),
 | |
|         ("abcdefgh", "cdefghijk", -1, False),  # default (2)
 | |
|         ("abcdefgh", "cdefghijkl", 6, True),
 | |
|         ("abcdefgh", "cdefghijkl", 5, False),
 | |
|         ("abcdefgh", "cdefghijkl", -1, False),  # default (2)
 | |
|     ],
 | |
| )
 | |
| def test_levenshtein_compare(a, b, fuzzy, expected):
 | |
|     assert levenshtein_compare(a, b, fuzzy) == expected
 |