spaCy/tests/regression/test_issue3521.py

# coding: utf8
from __future__ import unicode_literals

import pytest


@pytest.mark.parametrize("word", ["don't", "don’t", "I'd", "I’d"])
def test_issue3521(en_tokenizer, word):
    tok = en_tokenizer(word)[1]
    # 'not' and 'would' should be stopwords, also in their abbreviated forms
    assert tok.is_stop
-												specify encoding in files

											
										
										
											2019-04-02 16:05:31 +03:00
+								# coding: utf8
-												addressed all comments by Ines

											
										
										
											2019-04-03 14:50:33 +03:00
+								from __future__ import unicode_literals
-												failing test for Issue #3521

											
										
										
											2019-04-02 14:15:35 +03:00
+								import pytest
-												Tidy up and auto-format

											
										
										
											2019-08-20 18:36:34 +03:00
+								@pytest.mark.parametrize("word", ["don't", "don’t", "I'd", "I’d"])
-												fixing Issue #3521 by adding all hyphen variants for each stopword

											
										
										
											2019-04-02 14:24:59 +03:00
+								def test_issue3521(en_tokenizer, word):
 								    tok = en_tokenizer(word)[1]
 								    # 'not' and 'would' should be stopwords, also in their abbreviated forms
-												failing test for Issue #3521

											
										
										
											2019-04-02 14:15:35 +03:00
+								    assert tok.is_stop