remove cause without apostrophe from norm exceptions (#6636)

This commit is contained in:
Sofie Van Landeghem 2021-01-06 05:30:30 +01:00 committed by GitHub
parent 87562e470d
commit 6f7e7d88b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 2 deletions

View File

@ -319,7 +319,6 @@ for exc_data in [
# Other contractions with leading apostrophe
for exc_data in [
{ORTH: "cause", NORM: "because"},
{ORTH: "em", LEMMA: PRON_LEMMA, NORM: "them"},
{ORTH: "ll", LEMMA: "will", NORM: "will"},
{ORTH: "nuff", LEMMA: "enough", NORM: "enough"},

View File

@ -111,7 +111,15 @@ def test_en_tokenizer_handles_times(en_tokenizer, text):
@pytest.mark.parametrize(
"text,norms", [("I'm", ["i", "am"]), ("shan't", ["shall", "not"])]
"text,norms",
[
("I'm", ["i", "am"]),
("shan't", ["shall", "not"]),
(
"Many factors cause cancer 'cause it is complex",
["many", "factors", "cause", "cancer", "because", "it", "is", "complex"],
),
],
)
def test_en_tokenizer_norm_exceptions(en_tokenizer, text, norms):
tokens = en_tokenizer(text)