spaCy/tests/regression/test_issue3521.py

12 lines
315 B
Python
Raw Normal View History

2019-04-02 16:05:31 +03:00
# coding: utf8
2019-04-03 14:50:33 +03:00
from __future__ import unicode_literals
2019-04-02 14:15:35 +03:00
import pytest
2019-08-20 18:36:34 +03:00
@pytest.mark.parametrize("word", ["don't", "dont", "I'd", "Id"])
def test_issue3521(en_tokenizer, word):
tok = en_tokenizer(word)[1]
# 'not' and 'would' should be stopwords, also in their abbreviated forms
2019-04-02 14:15:35 +03:00
assert tok.is_stop