mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
remove cause without apostrophe from norm exceptions (#6636)
This commit is contained in:
parent
87562e470d
commit
6f7e7d88b9
|
@ -319,7 +319,6 @@ for exc_data in [
|
||||||
# Other contractions with leading apostrophe
|
# Other contractions with leading apostrophe
|
||||||
|
|
||||||
for exc_data in [
|
for exc_data in [
|
||||||
{ORTH: "cause", NORM: "because"},
|
|
||||||
{ORTH: "em", LEMMA: PRON_LEMMA, NORM: "them"},
|
{ORTH: "em", LEMMA: PRON_LEMMA, NORM: "them"},
|
||||||
{ORTH: "ll", LEMMA: "will", NORM: "will"},
|
{ORTH: "ll", LEMMA: "will", NORM: "will"},
|
||||||
{ORTH: "nuff", LEMMA: "enough", NORM: "enough"},
|
{ORTH: "nuff", LEMMA: "enough", NORM: "enough"},
|
||||||
|
|
|
@ -111,7 +111,15 @@ def test_en_tokenizer_handles_times(en_tokenizer, text):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"text,norms", [("I'm", ["i", "am"]), ("shan't", ["shall", "not"])]
|
"text,norms",
|
||||||
|
[
|
||||||
|
("I'm", ["i", "am"]),
|
||||||
|
("shan't", ["shall", "not"]),
|
||||||
|
(
|
||||||
|
"Many factors cause cancer 'cause it is complex",
|
||||||
|
["many", "factors", "cause", "cancer", "because", "it", "is", "complex"],
|
||||||
|
),
|
||||||
|
],
|
||||||
)
|
)
|
||||||
def test_en_tokenizer_norm_exceptions(en_tokenizer, text, norms):
|
def test_en_tokenizer_norm_exceptions(en_tokenizer, text, norms):
|
||||||
tokens = en_tokenizer(text)
|
tokens = en_tokenizer(text)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user