from spacy.util import detokenize def test_punct(): tokens = 'Pierre Vinken , 61 years old .'.split() detoks = [(0,), (1, 2), (3,), (4,), (5, 6)] token_rules = (',', '.') assert detokenize(token_rules, tokens) == detoks def test_contractions(): tokens = "I ca n't even".split() detoks = [(0,), (1, 2), (3,)] token_rules = ("can't",) assert detokenize(token_rules, tokens) == detoks def test_contractions_punct(): tokens = "I ca n't !".split() detoks = [(0,), (1, 2, 3)] token_rules = ("can't", '!') assert detokenize(token_rules, tokens) == detoks