# coding: utf8 from __future__ import unicode_literals import pytest @pytest.mark.parametrize("word", ["don't", "don’t", "I'd", "I’d"]) def test_issue3521(en_tokenizer, word): tok = en_tokenizer(word)[1] # 'not' and 'would' should be stopwords, also in their abbreviated forms assert tok.is_stop