mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
addressed all comments by Ines
This commit is contained in:
parent
85b4319f33
commit
4ff786e113
|
@ -69,7 +69,10 @@ yet you your yours yourself yourselves
|
||||||
""".split()
|
""".split()
|
||||||
)
|
)
|
||||||
|
|
||||||
for hyphen in ["'", "`", "‘", "´", "’"]:
|
contractions = ["n't", "'d", "'ll", "'m", "'re", "'s", "'ve"]
|
||||||
for stopword in u"n't 'd 'll 'm 're 's 've".split():
|
STOP_WORDS.update(contractions)
|
||||||
STOP_WORDS.add(stopword.replace("'", hyphen))
|
|
||||||
|
for apostrophe in ["‘", "’"]:
|
||||||
|
for stopword in contractions:
|
||||||
|
STOP_WORDS.add(stopword.replace("'", apostrophe))
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
|
|
|
@ -1,14 +1,16 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"word",
|
"word",
|
||||||
[
|
[
|
||||||
u"don't",
|
"don't",
|
||||||
u"don’t",
|
"don’t",
|
||||||
u"I'd",
|
"I'd",
|
||||||
u"I’d",
|
"I’d",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_issue3521(en_tokenizer, word):
|
def test_issue3521(en_tokenizer, word):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user