mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
remove cause without apostrophe from norm exceptions (#6636)
This commit is contained in:
parent
87562e470d
commit
6f7e7d88b9
|
@ -319,7 +319,6 @@ for exc_data in [
|
|||
# Other contractions with leading apostrophe
|
||||
|
||||
for exc_data in [
|
||||
{ORTH: "cause", NORM: "because"},
|
||||
{ORTH: "em", LEMMA: PRON_LEMMA, NORM: "them"},
|
||||
{ORTH: "ll", LEMMA: "will", NORM: "will"},
|
||||
{ORTH: "nuff", LEMMA: "enough", NORM: "enough"},
|
||||
|
|
|
@ -111,7 +111,15 @@ def test_en_tokenizer_handles_times(en_tokenizer, text):
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,norms", [("I'm", ["i", "am"]), ("shan't", ["shall", "not"])]
|
||||
"text,norms",
|
||||
[
|
||||
("I'm", ["i", "am"]),
|
||||
("shan't", ["shall", "not"]),
|
||||
(
|
||||
"Many factors cause cancer 'cause it is complex",
|
||||
["many", "factors", "cause", "cancer", "because", "it", "is", "complex"],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_en_tokenizer_norm_exceptions(en_tokenizer, text, norms):
|
||||
tokens = en_tokenizer(text)
|
||||
|
|
Loading…
Reference in New Issue
Block a user