mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
Decorate regression tests
Even if the issue number is already in the file, I still decorated them just to follow the convention found in test_issue8168.py
This commit is contained in:
parent
91dec2c76e
commit
addeb34bc4
|
@ -12,6 +12,7 @@ from spacy.tokens import Doc, Span
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(118)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"patterns",
|
"patterns",
|
||||||
[
|
[
|
||||||
|
@ -39,6 +40,7 @@ def test_issue118(en_tokenizer, patterns):
|
||||||
assert ents[0].end == 11
|
assert ents[0].end == 11
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(118)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"patterns",
|
"patterns",
|
||||||
[
|
[
|
||||||
|
@ -66,6 +68,7 @@ def test_issue118_prefix_reorder(en_tokenizer, patterns):
|
||||||
assert ents[0].end == 11
|
assert ents[0].end == 11
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(242)
|
||||||
def test_issue242(en_tokenizer):
|
def test_issue242(en_tokenizer):
|
||||||
"""Test overlapping multi-word phrases."""
|
"""Test overlapping multi-word phrases."""
|
||||||
text = "There are different food safety standards in different countries."
|
text = "There are different food safety standards in different countries."
|
||||||
|
@ -88,6 +91,7 @@ def test_issue242(en_tokenizer):
|
||||||
doc.ents += tuple(matches)
|
doc.ents += tuple(matches)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(309)
|
||||||
def test_issue309(en_vocab):
|
def test_issue309(en_vocab):
|
||||||
"""Test Issue #309: SBD fails on empty string"""
|
"""Test Issue #309: SBD fails on empty string"""
|
||||||
doc = Doc(en_vocab, words=[" "], heads=[0], deps=["ROOT"])
|
doc = Doc(en_vocab, words=[" "], heads=[0], deps=["ROOT"])
|
||||||
|
@ -96,6 +100,7 @@ def test_issue309(en_vocab):
|
||||||
assert len(sents) == 1
|
assert len(sents) == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(351)
|
||||||
def test_issue351(en_tokenizer):
|
def test_issue351(en_tokenizer):
|
||||||
doc = en_tokenizer(" This is a cat.")
|
doc = en_tokenizer(" This is a cat.")
|
||||||
assert doc[0].idx == 0
|
assert doc[0].idx == 0
|
||||||
|
@ -103,12 +108,14 @@ def test_issue351(en_tokenizer):
|
||||||
assert doc[1].idx == 3
|
assert doc[1].idx == 3
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(360)
|
||||||
def test_issue360(en_tokenizer):
|
def test_issue360(en_tokenizer):
|
||||||
"""Test tokenization of big ellipsis"""
|
"""Test tokenization of big ellipsis"""
|
||||||
tokens = en_tokenizer("$45...............Asking")
|
tokens = en_tokenizer("$45...............Asking")
|
||||||
assert len(tokens) > 2
|
assert len(tokens) > 2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(361)
|
||||||
@pytest.mark.parametrize("text1,text2", [("cat", "dog")])
|
@pytest.mark.parametrize("text1,text2", [("cat", "dog")])
|
||||||
def test_issue361(en_vocab, text1, text2):
|
def test_issue361(en_vocab, text1, text2):
|
||||||
"""Test Issue #361: Equality of lexemes"""
|
"""Test Issue #361: Equality of lexemes"""
|
||||||
|
@ -116,6 +123,7 @@ def test_issue361(en_vocab, text1, text2):
|
||||||
assert en_vocab[text1] != en_vocab[text2]
|
assert en_vocab[text1] != en_vocab[text2]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(587)
|
||||||
def test_issue587(en_tokenizer):
|
def test_issue587(en_tokenizer):
|
||||||
"""Test that Matcher doesn't segfault on particular input"""
|
"""Test that Matcher doesn't segfault on particular input"""
|
||||||
doc = en_tokenizer("a b; c")
|
doc = en_tokenizer("a b; c")
|
||||||
|
@ -131,12 +139,14 @@ def test_issue587(en_tokenizer):
|
||||||
assert len(matches) == 2
|
assert len(matches) == 2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(588)
|
||||||
def test_issue588(en_vocab):
|
def test_issue588(en_vocab):
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
matcher.add("TEST", [[]])
|
matcher.add("TEST", [[]])
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(590)
|
||||||
def test_issue590(en_vocab):
|
def test_issue590(en_vocab):
|
||||||
"""Test overlapping matches"""
|
"""Test overlapping matches"""
|
||||||
doc = Doc(en_vocab, words=["n", "=", "1", ";", "a", ":", "5", "%"])
|
doc = Doc(en_vocab, words=["n", "=", "1", ";", "a", ":", "5", "%"])
|
||||||
|
@ -149,6 +159,7 @@ def test_issue590(en_vocab):
|
||||||
assert len(matches) == 2
|
assert len(matches) == 2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(595)
|
||||||
@pytest.mark.skip(reason="Old vocab-based lemmatization")
|
@pytest.mark.skip(reason="Old vocab-based lemmatization")
|
||||||
def test_issue595():
|
def test_issue595():
|
||||||
"""Test lemmatization of base forms"""
|
"""Test lemmatization of base forms"""
|
||||||
|
@ -164,6 +175,7 @@ def test_issue595():
|
||||||
assert doc[2].lemma_ == "feed"
|
assert doc[2].lemma_ == "feed"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(599)
|
||||||
def test_issue599(en_vocab):
|
def test_issue599(en_vocab):
|
||||||
doc = Doc(en_vocab)
|
doc = Doc(en_vocab)
|
||||||
doc2 = Doc(doc.vocab)
|
doc2 = Doc(doc.vocab)
|
||||||
|
@ -171,12 +183,14 @@ def test_issue599(en_vocab):
|
||||||
assert doc2.has_annotation("DEP")
|
assert doc2.has_annotation("DEP")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(600)
|
||||||
def test_issue600():
|
def test_issue600():
|
||||||
vocab = Vocab(tag_map={"NN": {"pos": "NOUN"}})
|
vocab = Vocab(tag_map={"NN": {"pos": "NOUN"}})
|
||||||
doc = Doc(vocab, words=["hello"])
|
doc = Doc(vocab, words=["hello"])
|
||||||
doc[0].tag_ = "NN"
|
doc[0].tag_ = "NN"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(615)
|
||||||
def test_issue615(en_tokenizer):
|
def test_issue615(en_tokenizer):
|
||||||
def merge_phrases(matcher, doc, i, matches):
|
def merge_phrases(matcher, doc, i, matches):
|
||||||
"""Merge a phrase. We have to be careful here because we'll change the
|
"""Merge a phrase. We have to be careful here because we'll change the
|
||||||
|
@ -204,6 +218,7 @@ def test_issue615(en_tokenizer):
|
||||||
assert entities[0].label != 0
|
assert entities[0].label != 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(736)
|
||||||
@pytest.mark.parametrize("text,number", [("7am", "7"), ("11p.m.", "11")])
|
@pytest.mark.parametrize("text,number", [("7am", "7"), ("11p.m.", "11")])
|
||||||
def test_issue736(en_tokenizer, text, number):
|
def test_issue736(en_tokenizer, text, number):
|
||||||
"""Test that times like "7am" are tokenized correctly and that numbers are
|
"""Test that times like "7am" are tokenized correctly and that numbers are
|
||||||
|
@ -213,6 +228,7 @@ def test_issue736(en_tokenizer, text, number):
|
||||||
assert tokens[0].text == number
|
assert tokens[0].text == number
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(740)
|
||||||
@pytest.mark.parametrize("text", ["3/4/2012", "01/12/1900"])
|
@pytest.mark.parametrize("text", ["3/4/2012", "01/12/1900"])
|
||||||
def test_issue740(en_tokenizer, text):
|
def test_issue740(en_tokenizer, text):
|
||||||
"""Test that dates are not split and kept as one token. This behaviour is
|
"""Test that dates are not split and kept as one token. This behaviour is
|
||||||
|
@ -222,6 +238,7 @@ def test_issue740(en_tokenizer, text):
|
||||||
assert len(tokens) == 1
|
assert len(tokens) == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(743)
|
||||||
def test_issue743():
|
def test_issue743():
|
||||||
doc = Doc(Vocab(), ["hello", "world"])
|
doc = Doc(Vocab(), ["hello", "world"])
|
||||||
token = doc[0]
|
token = doc[0]
|
||||||
|
@ -230,6 +247,7 @@ def test_issue743():
|
||||||
assert items[0] is token
|
assert items[0] is token
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(744)
|
||||||
@pytest.mark.parametrize("text", ["We were scared", "We Were Scared"])
|
@pytest.mark.parametrize("text", ["We were scared", "We Were Scared"])
|
||||||
def test_issue744(en_tokenizer, text):
|
def test_issue744(en_tokenizer, text):
|
||||||
"""Test that 'were' and 'Were' are excluded from the contractions
|
"""Test that 'were' and 'Were' are excluded from the contractions
|
||||||
|
@ -239,6 +257,7 @@ def test_issue744(en_tokenizer, text):
|
||||||
assert tokens[1].text.lower() == "were"
|
assert tokens[1].text.lower() == "were"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(759)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"text,is_num", [("one", True), ("ten", True), ("teneleven", False)]
|
"text,is_num", [("one", True), ("ten", True), ("teneleven", False)]
|
||||||
)
|
)
|
||||||
|
@ -247,6 +266,7 @@ def test_issue759(en_tokenizer, text, is_num):
|
||||||
assert tokens[0].like_num == is_num
|
assert tokens[0].like_num == is_num
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(775)
|
||||||
@pytest.mark.parametrize("text", ["Shell", "shell", "Shed", "shed"])
|
@pytest.mark.parametrize("text", ["Shell", "shell", "Shed", "shed"])
|
||||||
def test_issue775(en_tokenizer, text):
|
def test_issue775(en_tokenizer, text):
|
||||||
"""Test that 'Shell' and 'shell' are excluded from the contractions
|
"""Test that 'Shell' and 'shell' are excluded from the contractions
|
||||||
|
@ -256,6 +276,7 @@ def test_issue775(en_tokenizer, text):
|
||||||
assert tokens[0].text == text
|
assert tokens[0].text == text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(792)
|
||||||
@pytest.mark.parametrize("text", ["This is a string ", "This is a string\u0020"])
|
@pytest.mark.parametrize("text", ["This is a string ", "This is a string\u0020"])
|
||||||
def test_issue792(en_tokenizer, text):
|
def test_issue792(en_tokenizer, text):
|
||||||
"""Test for Issue #792: Trailing whitespace is removed after tokenization."""
|
"""Test for Issue #792: Trailing whitespace is removed after tokenization."""
|
||||||
|
@ -263,6 +284,7 @@ def test_issue792(en_tokenizer, text):
|
||||||
assert "".join([token.text_with_ws for token in doc]) == text
|
assert "".join([token.text_with_ws for token in doc]) == text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(792)
|
||||||
@pytest.mark.parametrize("text", ["This is a string", "This is a string\n"])
|
@pytest.mark.parametrize("text", ["This is a string", "This is a string\n"])
|
||||||
def test_control_issue792(en_tokenizer, text):
|
def test_control_issue792(en_tokenizer, text):
|
||||||
"""Test base case for Issue #792: Non-trailing whitespace"""
|
"""Test base case for Issue #792: Non-trailing whitespace"""
|
||||||
|
@ -270,6 +292,7 @@ def test_control_issue792(en_tokenizer, text):
|
||||||
assert "".join([token.text_with_ws for token in doc]) == text
|
assert "".join([token.text_with_ws for token in doc]) == text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(801)
|
||||||
@pytest.mark.skip(
|
@pytest.mark.skip(
|
||||||
reason="Can not be fixed unless with variable-width lookbehinds, cf. PR #3218"
|
reason="Can not be fixed unless with variable-width lookbehinds, cf. PR #3218"
|
||||||
)
|
)
|
||||||
|
@ -292,6 +315,7 @@ def test_issue801(en_tokenizer, text, tokens):
|
||||||
assert [t.text for t in doc] == tokens
|
assert [t.text for t in doc] == tokens
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(805)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"text,expected_tokens",
|
"text,expected_tokens",
|
||||||
[
|
[
|
||||||
|
@ -311,6 +335,7 @@ def test_issue805(sv_tokenizer, text, expected_tokens):
|
||||||
assert expected_tokens == token_list
|
assert expected_tokens == token_list
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(850)
|
||||||
def test_issue850():
|
def test_issue850():
|
||||||
"""The variable-length pattern matches the succeeding token. Check we
|
"""The variable-length pattern matches the succeeding token. Check we
|
||||||
handle the ambiguity correctly."""
|
handle the ambiguity correctly."""
|
||||||
|
@ -326,6 +351,7 @@ def test_issue850():
|
||||||
assert end == 4
|
assert end == 4
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(850)
|
||||||
def test_issue850_basic():
|
def test_issue850_basic():
|
||||||
"""Test Matcher matches with '*' operator and Boolean flag"""
|
"""Test Matcher matches with '*' operator and Boolean flag"""
|
||||||
vocab = Vocab(lex_attr_getters={LOWER: lambda string: string.lower()})
|
vocab = Vocab(lex_attr_getters={LOWER: lambda string: string.lower()})
|
||||||
|
@ -340,6 +366,7 @@ def test_issue850_basic():
|
||||||
assert end == 4
|
assert end == 4
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(852)
|
||||||
@pytest.mark.skip(
|
@pytest.mark.skip(
|
||||||
reason="French exception list is not enabled in the default tokenizer anymore"
|
reason="French exception list is not enabled in the default tokenizer anymore"
|
||||||
)
|
)
|
||||||
|
@ -352,6 +379,7 @@ def test_issue852(fr_tokenizer, text):
|
||||||
assert len(tokens) == 1
|
assert len(tokens) == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(859)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"text", ["aaabbb@ccc.com\nThank you!", "aaabbb@ccc.com \nThank you!"]
|
"text", ["aaabbb@ccc.com\nThank you!", "aaabbb@ccc.com \nThank you!"]
|
||||||
)
|
)
|
||||||
|
@ -361,6 +389,7 @@ def test_issue859(en_tokenizer, text):
|
||||||
assert doc.text == text
|
assert doc.text == text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(886)
|
||||||
@pytest.mark.parametrize("text", ["Datum:2014-06-02\nDokument:76467"])
|
@pytest.mark.parametrize("text", ["Datum:2014-06-02\nDokument:76467"])
|
||||||
def test_issue886(en_tokenizer, text):
|
def test_issue886(en_tokenizer, text):
|
||||||
"""Test that token.idx matches the original text index for texts with newlines."""
|
"""Test that token.idx matches the original text index for texts with newlines."""
|
||||||
|
@ -370,6 +399,7 @@ def test_issue886(en_tokenizer, text):
|
||||||
assert text[token.idx] == token.text[0]
|
assert text[token.idx] == token.text[0]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(891)
|
||||||
@pytest.mark.parametrize("text", ["want/need"])
|
@pytest.mark.parametrize("text", ["want/need"])
|
||||||
def test_issue891(en_tokenizer, text):
|
def test_issue891(en_tokenizer, text):
|
||||||
"""Test that / infixes are split correctly."""
|
"""Test that / infixes are split correctly."""
|
||||||
|
@ -378,6 +408,7 @@ def test_issue891(en_tokenizer, text):
|
||||||
assert tokens[1].text == "/"
|
assert tokens[1].text == "/"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(912)
|
||||||
@pytest.mark.skip(reason="Old vocab-based lemmatization")
|
@pytest.mark.skip(reason="Old vocab-based lemmatization")
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"text,tag,lemma",
|
"text,tag,lemma",
|
||||||
|
@ -390,6 +421,7 @@ def test_issue912(en_vocab, text, tag, lemma):
|
||||||
assert doc[0].lemma_ == lemma
|
assert doc[0].lemma_ == lemma
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(957)
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
def test_issue957(en_tokenizer):
|
def test_issue957(en_tokenizer):
|
||||||
"""Test that spaCy doesn't hang on many punctuation characters.
|
"""Test that spaCy doesn't hang on many punctuation characters.
|
||||||
|
@ -405,6 +437,7 @@ def test_issue957(en_tokenizer):
|
||||||
assert doc
|
assert doc
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(999)
|
||||||
def test_issue999():
|
def test_issue999():
|
||||||
"""Test that adding entities and resuming training works passably OK.
|
"""Test that adding entities and resuming training works passably OK.
|
||||||
There are two issues here:
|
There are two issues here:
|
||||||
|
|
|
@ -9,6 +9,7 @@ from spacy.tokenizer import Tokenizer
|
||||||
from spacy.symbols import ORTH, LEMMA, POS
|
from spacy.symbols import ORTH, LEMMA, POS
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1061)
|
||||||
def test_issue1061():
|
def test_issue1061():
|
||||||
"""Test special-case works after tokenizing. Was caching problem."""
|
"""Test special-case works after tokenizing. Was caching problem."""
|
||||||
text = "I like _MATH_ even _MATH_ when _MATH_, except when _MATH_ is _MATH_! but not _MATH_."
|
text = "I like _MATH_ even _MATH_ when _MATH_, except when _MATH_ is _MATH_! but not _MATH_."
|
||||||
|
@ -33,6 +34,7 @@ def test_issue1061():
|
||||||
@pytest.mark.skip(
|
@pytest.mark.skip(
|
||||||
reason="Can not be fixed without variable-width look-behind (which we don't want)"
|
reason="Can not be fixed without variable-width look-behind (which we don't want)"
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(1235)
|
||||||
def test_issue1235():
|
def test_issue1235():
|
||||||
"""Test that g is not split of if preceded by a number and a letter"""
|
"""Test that g is not split of if preceded by a number and a letter"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -46,6 +48,7 @@ def test_issue1235():
|
||||||
assert doc[4].text == "g"
|
assert doc[4].text == "g"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1242)
|
||||||
def test_issue1242():
|
def test_issue1242():
|
||||||
nlp = English()
|
nlp = English()
|
||||||
doc = nlp("")
|
doc = nlp("")
|
||||||
|
@ -56,6 +59,7 @@ def test_issue1242():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="v3 no longer supports LEMMA/POS in tokenizer special cases")
|
@pytest.mark.skip(reason="v3 no longer supports LEMMA/POS in tokenizer special cases")
|
||||||
|
@pytest.mark.issue(1250)
|
||||||
def test_issue1250():
|
def test_issue1250():
|
||||||
"""Test cached special cases."""
|
"""Test cached special cases."""
|
||||||
special_case = [{ORTH: "reimbur", LEMMA: "reimburse", POS: "VERB"}]
|
special_case = [{ORTH: "reimbur", LEMMA: "reimburse", POS: "VERB"}]
|
||||||
|
@ -67,6 +71,7 @@ def test_issue1250():
|
||||||
assert lemmas == ["reimburse", ",", "reimburse", "..."]
|
assert lemmas == ["reimburse", ",", "reimburse", "..."]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1257)
|
||||||
def test_issue1257():
|
def test_issue1257():
|
||||||
"""Test that tokens compare correctly."""
|
"""Test that tokens compare correctly."""
|
||||||
doc1 = Doc(Vocab(), words=["a", "b", "c"])
|
doc1 = Doc(Vocab(), words=["a", "b", "c"])
|
||||||
|
@ -75,6 +80,7 @@ def test_issue1257():
|
||||||
assert not doc1[0] == doc2[0]
|
assert not doc1[0] == doc2[0]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1375)
|
||||||
def test_issue1375():
|
def test_issue1375():
|
||||||
"""Test that token.nbor() raises IndexError for out-of-bounds access."""
|
"""Test that token.nbor() raises IndexError for out-of-bounds access."""
|
||||||
doc = Doc(Vocab(), words=["0", "1", "2"])
|
doc = Doc(Vocab(), words=["0", "1", "2"])
|
||||||
|
@ -86,6 +92,7 @@ def test_issue1375():
|
||||||
assert doc[1].nbor(1).text == "2"
|
assert doc[1].nbor(1).text == "2"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1434)
|
||||||
def test_issue1434():
|
def test_issue1434():
|
||||||
"""Test matches occur when optional element at end of short doc."""
|
"""Test matches occur when optional element at end of short doc."""
|
||||||
pattern = [{"ORTH": "Hello"}, {"IS_ALPHA": True, "OP": "?"}]
|
pattern = [{"ORTH": "Hello"}, {"IS_ALPHA": True, "OP": "?"}]
|
||||||
|
@ -111,6 +118,7 @@ def test_issue1434():
|
||||||
("a b b", 0, 3),
|
("a b b", 0, 3),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(1450)
|
||||||
def test_issue1450(string, start, end):
|
def test_issue1450(string, start, end):
|
||||||
"""Test matcher works when patterns end with * operator."""
|
"""Test matcher works when patterns end with * operator."""
|
||||||
pattern = [{"ORTH": "a"}, {"ORTH": "b", "OP": "*"}]
|
pattern = [{"ORTH": "a"}, {"ORTH": "b", "OP": "*"}]
|
||||||
|
@ -124,6 +132,7 @@ def test_issue1450(string, start, end):
|
||||||
assert matches[-1][2] == end
|
assert matches[-1][2] == end
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1488)
|
||||||
def test_issue1488():
|
def test_issue1488():
|
||||||
prefix_re = re.compile(r"""[\[\("']""")
|
prefix_re = re.compile(r"""[\[\("']""")
|
||||||
suffix_re = re.compile(r"""[\]\)"']""")
|
suffix_re = re.compile(r"""[\]\)"']""")
|
||||||
|
@ -147,6 +156,7 @@ def test_issue1488():
|
||||||
assert token.text
|
assert token.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1494)
|
||||||
def test_issue1494():
|
def test_issue1494():
|
||||||
infix_re = re.compile(r"""[^a-z]""")
|
infix_re = re.compile(r"""[^a-z]""")
|
||||||
test_cases = [
|
test_cases = [
|
||||||
|
|
|
@ -17,6 +17,7 @@ from spacy.matcher import Matcher
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1506)
|
||||||
def test_issue1506():
|
def test_issue1506():
|
||||||
def string_generator():
|
def string_generator():
|
||||||
for _ in range(10001):
|
for _ in range(10001):
|
||||||
|
@ -40,6 +41,7 @@ def test_issue1506():
|
||||||
str(t.lemma_)
|
str(t.lemma_)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1518)
|
||||||
def test_issue1518():
|
def test_issue1518():
|
||||||
"""Test vectors.resize() works."""
|
"""Test vectors.resize() works."""
|
||||||
vectors = Vectors(shape=(10, 10))
|
vectors = Vectors(shape=(10, 10))
|
||||||
|
@ -47,6 +49,7 @@ def test_issue1518():
|
||||||
vectors.resize((5, 9))
|
vectors.resize((5, 9))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1537)
|
||||||
def test_issue1537():
|
def test_issue1537():
|
||||||
"""Test that Span.as_doc() doesn't segfault."""
|
"""Test that Span.as_doc() doesn't segfault."""
|
||||||
string = "The sky is blue . The man is pink . The dog is purple ."
|
string = "The sky is blue . The man is pink . The dog is purple ."
|
||||||
|
@ -65,6 +68,7 @@ def test_issue1537():
|
||||||
|
|
||||||
|
|
||||||
# TODO: Currently segfaulting, due to l_edge and r_edge misalignment
|
# TODO: Currently segfaulting, due to l_edge and r_edge misalignment
|
||||||
|
@pytest.mark.issue(1537)
|
||||||
# def test_issue1537_model():
|
# def test_issue1537_model():
|
||||||
# nlp = load_spacy('en')
|
# nlp = load_spacy('en')
|
||||||
# doc = nlp('The sky is blue. The man is pink. The dog is purple.')
|
# doc = nlp('The sky is blue. The man is pink. The dog is purple.')
|
||||||
|
@ -73,12 +77,14 @@ def test_issue1537():
|
||||||
# print(list(sents[1].noun_chunks))
|
# print(list(sents[1].noun_chunks))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1539)
|
||||||
def test_issue1539():
|
def test_issue1539():
|
||||||
"""Ensure vectors.resize() doesn't try to modify dictionary during iteration."""
|
"""Ensure vectors.resize() doesn't try to modify dictionary during iteration."""
|
||||||
v = Vectors(shape=(10, 10), keys=[5, 3, 98, 100])
|
v = Vectors(shape=(10, 10), keys=[5, 3, 98, 100])
|
||||||
v.resize((100, 100))
|
v.resize((100, 100))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1547)
|
||||||
def test_issue1547():
|
def test_issue1547():
|
||||||
"""Test that entity labels still match after merging tokens."""
|
"""Test that entity labels still match after merging tokens."""
|
||||||
words = ["\n", "worda", ".", "\n", "wordb", "-", "Biosphere", "2", "-", " \n"]
|
words = ["\n", "worda", ".", "\n", "wordb", "-", "Biosphere", "2", "-", " \n"]
|
||||||
|
@ -89,12 +95,14 @@ def test_issue1547():
|
||||||
assert [ent.text for ent in doc.ents]
|
assert [ent.text for ent in doc.ents]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1612)
|
||||||
def test_issue1612(en_tokenizer):
|
def test_issue1612(en_tokenizer):
|
||||||
doc = en_tokenizer("The black cat purrs.")
|
doc = en_tokenizer("The black cat purrs.")
|
||||||
span = doc[1:3]
|
span = doc[1:3]
|
||||||
assert span.orth_ == span.text
|
assert span.orth_ == span.text
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1654)
|
||||||
def test_issue1654():
|
def test_issue1654():
|
||||||
nlp = Language(Vocab())
|
nlp = Language(Vocab())
|
||||||
assert not nlp.pipeline
|
assert not nlp.pipeline
|
||||||
|
@ -116,12 +124,14 @@ def test_issue1654():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("text", ["test@example.com", "john.doe@example.co.uk"])
|
@pytest.mark.parametrize("text", ["test@example.com", "john.doe@example.co.uk"])
|
||||||
|
@pytest.mark.issue(1698)
|
||||||
def test_issue1698(en_tokenizer, text):
|
def test_issue1698(en_tokenizer, text):
|
||||||
doc = en_tokenizer(text)
|
doc = en_tokenizer(text)
|
||||||
assert len(doc) == 1
|
assert len(doc) == 1
|
||||||
assert not doc[0].like_url
|
assert not doc[0].like_url
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1727)
|
||||||
def test_issue1727():
|
def test_issue1727():
|
||||||
"""Test that models with no pretrained vectors can be deserialized
|
"""Test that models with no pretrained vectors can be deserialized
|
||||||
correctly after vectors are added."""
|
correctly after vectors are added."""
|
||||||
|
@ -138,6 +148,7 @@ def test_issue1727():
|
||||||
assert tagger.cfg.get("pretrained_dims", 0) == 0
|
assert tagger.cfg.get("pretrained_dims", 0) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1757)
|
||||||
def test_issue1757():
|
def test_issue1757():
|
||||||
"""Test comparison against None doesn't cause segfault."""
|
"""Test comparison against None doesn't cause segfault."""
|
||||||
doc = Doc(Vocab(), words=["a", "b", "c"])
|
doc = Doc(Vocab(), words=["a", "b", "c"])
|
||||||
|
@ -151,12 +162,14 @@ def test_issue1757():
|
||||||
assert not doc.vocab["a"] < None
|
assert not doc.vocab["a"] < None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1758)
|
||||||
def test_issue1758(en_tokenizer):
|
def test_issue1758(en_tokenizer):
|
||||||
"""Test that "would've" is handled by the English tokenizer exceptions."""
|
"""Test that "would've" is handled by the English tokenizer exceptions."""
|
||||||
tokens = en_tokenizer("would've")
|
tokens = en_tokenizer("would've")
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1773)
|
||||||
def test_issue1773(en_tokenizer):
|
def test_issue1773(en_tokenizer):
|
||||||
"""Test that spaces don't receive a POS but no TAG. This is the root cause
|
"""Test that spaces don't receive a POS but no TAG. This is the root cause
|
||||||
of the serialization issue reported in #1773."""
|
of the serialization issue reported in #1773."""
|
||||||
|
@ -165,6 +178,7 @@ def test_issue1773(en_tokenizer):
|
||||||
assert doc[0].tag_ != ""
|
assert doc[0].tag_ != ""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1799)
|
||||||
def test_issue1799():
|
def test_issue1799():
|
||||||
"""Test sentence boundaries are deserialized correctly, even for
|
"""Test sentence boundaries are deserialized correctly, even for
|
||||||
non-projective sentences."""
|
non-projective sentences."""
|
||||||
|
@ -186,6 +200,7 @@ def test_issue1799():
|
||||||
assert len(list(doc.sents)) == 1
|
assert len(list(doc.sents)) == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1807)
|
||||||
def test_issue1807():
|
def test_issue1807():
|
||||||
"""Test vocab.set_vector also adds the word to the vocab."""
|
"""Test vocab.set_vector also adds the word to the vocab."""
|
||||||
vocab = Vocab(vectors_name="test_issue1807")
|
vocab = Vocab(vectors_name="test_issue1807")
|
||||||
|
@ -194,6 +209,7 @@ def test_issue1807():
|
||||||
assert "hello" in vocab
|
assert "hello" in vocab
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1834)
|
||||||
def test_issue1834():
|
def test_issue1834():
|
||||||
"""Test that sentence boundaries & parse/tag flags are not lost
|
"""Test that sentence boundaries & parse/tag flags are not lost
|
||||||
during serialization."""
|
during serialization."""
|
||||||
|
@ -217,6 +233,7 @@ def test_issue1834():
|
||||||
assert new_doc.has_annotation("TAG")
|
assert new_doc.has_annotation("TAG")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1868)
|
||||||
def test_issue1868():
|
def test_issue1868():
|
||||||
"""Test Vocab.__contains__ works with int keys."""
|
"""Test Vocab.__contains__ works with int keys."""
|
||||||
vocab = Vocab()
|
vocab = Vocab()
|
||||||
|
@ -228,6 +245,7 @@ def test_issue1868():
|
||||||
assert int_id not in vocab
|
assert int_id not in vocab
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1883)
|
||||||
def test_issue1883():
|
def test_issue1883():
|
||||||
matcher = Matcher(Vocab())
|
matcher = Matcher(Vocab())
|
||||||
matcher.add("pat1", [[{"orth": "hello"}]])
|
matcher.add("pat1", [[{"orth": "hello"}]])
|
||||||
|
@ -239,11 +257,13 @@ def test_issue1883():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("word", ["the"])
|
@pytest.mark.parametrize("word", ["the"])
|
||||||
|
@pytest.mark.issue(1889)
|
||||||
def test_issue1889(word):
|
def test_issue1889(word):
|
||||||
assert is_stop(word, STOP_WORDS) == is_stop(word.upper(), STOP_WORDS)
|
assert is_stop(word, STOP_WORDS) == is_stop(word.upper(), STOP_WORDS)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="obsolete with the config refactor of v.3")
|
@pytest.mark.skip(reason="obsolete with the config refactor of v.3")
|
||||||
|
@pytest.mark.issue(1915)
|
||||||
def test_issue1915():
|
def test_issue1915():
|
||||||
cfg = {"hidden_depth": 2} # should error out
|
cfg = {"hidden_depth": 2} # should error out
|
||||||
nlp = Language()
|
nlp = Language()
|
||||||
|
@ -253,6 +273,7 @@ def test_issue1915():
|
||||||
nlp.initialize(**cfg)
|
nlp.initialize(**cfg)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1945)
|
||||||
def test_issue1945():
|
def test_issue1945():
|
||||||
"""Test regression in Matcher introduced in v2.0.6."""
|
"""Test regression in Matcher introduced in v2.0.6."""
|
||||||
matcher = Matcher(Vocab())
|
matcher = Matcher(Vocab())
|
||||||
|
@ -264,6 +285,7 @@ def test_issue1945():
|
||||||
assert matches[1][1:] == (1, 3)
|
assert matches[1][1:] == (1, 3)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1963)
|
||||||
def test_issue1963(en_tokenizer):
|
def test_issue1963(en_tokenizer):
|
||||||
"""Test that doc.merge() resizes doc.tensor"""
|
"""Test that doc.merge() resizes doc.tensor"""
|
||||||
doc = en_tokenizer("a b c d")
|
doc = en_tokenizer("a b c d")
|
||||||
|
@ -275,6 +297,7 @@ def test_issue1963(en_tokenizer):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
|
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
|
||||||
|
@pytest.mark.issue(1967)
|
||||||
def test_issue1967(label):
|
def test_issue1967(label):
|
||||||
nlp = Language()
|
nlp = Language()
|
||||||
config = {}
|
config = {}
|
||||||
|
@ -293,6 +316,7 @@ def test_issue1967(label):
|
||||||
assert "JOB-NAME" in ner.moves.get_actions(examples=[example])[1]
|
assert "JOB-NAME" in ner.moves.get_actions(examples=[example])[1]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(1971)
|
||||||
def test_issue1971(en_vocab):
|
def test_issue1971(en_vocab):
|
||||||
# Possibly related to #2675 and #2671?
|
# Possibly related to #2675 and #2671?
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
|
|
|
@ -13,6 +13,7 @@ from ..util import add_vecs_to_vocab
|
||||||
@pytest.mark.skip(
|
@pytest.mark.skip(
|
||||||
reason="Can not be fixed without iterative looping between prefix/suffix and infix"
|
reason="Can not be fixed without iterative looping between prefix/suffix and infix"
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(2070)
|
||||||
def test_issue2070():
|
def test_issue2070():
|
||||||
"""Test that checks that a dot followed by a quote is handled
|
"""Test that checks that a dot followed by a quote is handled
|
||||||
appropriately.
|
appropriately.
|
||||||
|
@ -25,6 +26,7 @@ def test_issue2070():
|
||||||
assert len(doc) == 11
|
assert len(doc) == 11
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2179)
|
||||||
def test_issue2179():
|
def test_issue2179():
|
||||||
"""Test that spurious 'extra_labels' aren't created when initializing NER."""
|
"""Test that spurious 'extra_labels' aren't created when initializing NER."""
|
||||||
nlp = Italian()
|
nlp = Italian()
|
||||||
|
@ -41,6 +43,7 @@ def test_issue2179():
|
||||||
assert nlp2.get_pipe("ner").labels == ("CITIZENSHIP",)
|
assert nlp2.get_pipe("ner").labels == ("CITIZENSHIP",)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2203)
|
||||||
def test_issue2203(en_vocab):
|
def test_issue2203(en_vocab):
|
||||||
"""Test that lemmas are set correctly in doc.from_array."""
|
"""Test that lemmas are set correctly in doc.from_array."""
|
||||||
words = ["I", "'ll", "survive"]
|
words = ["I", "'ll", "survive"]
|
||||||
|
@ -61,6 +64,7 @@ def test_issue2203(en_vocab):
|
||||||
assert [t.lemma_ for t in new_doc] == lemmas
|
assert [t.lemma_ for t in new_doc] == lemmas
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2219)
|
||||||
def test_issue2219(en_vocab):
|
def test_issue2219(en_vocab):
|
||||||
vectors = [("a", [1, 2, 3]), ("letter", [4, 5, 6])]
|
vectors = [("a", [1, 2, 3]), ("letter", [4, 5, 6])]
|
||||||
add_vecs_to_vocab(en_vocab, vectors)
|
add_vecs_to_vocab(en_vocab, vectors)
|
||||||
|
@ -69,6 +73,7 @@ def test_issue2219(en_vocab):
|
||||||
assert doc[0].similarity(doc[1]) == doc[1].similarity(doc[0])
|
assert doc[0].similarity(doc[1]) == doc[1].similarity(doc[0])
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2361)
|
||||||
def test_issue2361(de_vocab):
|
def test_issue2361(de_vocab):
|
||||||
chars = ("<", ">", "&", """)
|
chars = ("<", ">", "&", """)
|
||||||
words = ["<", ">", "&", '"']
|
words = ["<", ">", "&", '"']
|
||||||
|
@ -78,6 +83,7 @@ def test_issue2361(de_vocab):
|
||||||
assert char in html
|
assert char in html
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2385)
|
||||||
def test_issue2385():
|
def test_issue2385():
|
||||||
"""Test that IOB tags are correctly converted to BILUO tags."""
|
"""Test that IOB tags are correctly converted to BILUO tags."""
|
||||||
# fix bug in labels with a 'b' character
|
# fix bug in labels with a 'b' character
|
||||||
|
@ -99,11 +105,13 @@ def test_issue2385():
|
||||||
("U-BRAWLER", "U-BRAWLER"),
|
("U-BRAWLER", "U-BRAWLER"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(2385)
|
||||||
def test_issue2385_biluo(tags):
|
def test_issue2385_biluo(tags):
|
||||||
"""Test that BILUO-compatible tags aren't modified."""
|
"""Test that BILUO-compatible tags aren't modified."""
|
||||||
assert iob_to_biluo(tags) == list(tags)
|
assert iob_to_biluo(tags) == list(tags)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2396)
|
||||||
def test_issue2396(en_vocab):
|
def test_issue2396(en_vocab):
|
||||||
words = ["She", "created", "a", "test", "for", "spacy"]
|
words = ["She", "created", "a", "test", "for", "spacy"]
|
||||||
heads = [1, 1, 3, 1, 3, 4]
|
heads = [1, 1, 3, 1, 3, 4]
|
||||||
|
@ -125,6 +133,7 @@ def test_issue2396(en_vocab):
|
||||||
assert (span.get_lca_matrix() == matrix).all()
|
assert (span.get_lca_matrix() == matrix).all()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2464)
|
||||||
def test_issue2464(en_vocab):
|
def test_issue2464(en_vocab):
|
||||||
"""Test problem with successive ?. This is the same bug, so putting it here."""
|
"""Test problem with successive ?. This is the same bug, so putting it here."""
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
|
@ -134,6 +143,7 @@ def test_issue2464(en_vocab):
|
||||||
assert len(matches) == 3
|
assert len(matches) == 3
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2482)
|
||||||
def test_issue2482():
|
def test_issue2482():
|
||||||
"""Test we can serialize and deserialize a blank NER or parser model."""
|
"""Test we can serialize and deserialize a blank NER or parser model."""
|
||||||
nlp = Italian()
|
nlp = Italian()
|
||||||
|
|
|
@ -13,6 +13,7 @@ import numpy
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2564)
|
||||||
def test_issue2564():
|
def test_issue2564():
|
||||||
"""Test the tagger sets has_annotation("TAG") correctly when used via Language.pipe."""
|
"""Test the tagger sets has_annotation("TAG") correctly when used via Language.pipe."""
|
||||||
nlp = Language()
|
nlp = Language()
|
||||||
|
@ -26,6 +27,7 @@ def test_issue2564():
|
||||||
assert piped_doc.has_annotation("TAG")
|
assert piped_doc.has_annotation("TAG")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2569)
|
||||||
def test_issue2569(en_tokenizer):
|
def test_issue2569(en_tokenizer):
|
||||||
"""Test that operator + is greedy."""
|
"""Test that operator + is greedy."""
|
||||||
doc = en_tokenizer("It is May 15, 1993.")
|
doc = en_tokenizer("It is May 15, 1993.")
|
||||||
|
@ -46,12 +48,14 @@ def test_issue2569(en_tokenizer):
|
||||||
"oow.jspsearch.eventoracleopenworldsearch.technologyoraclesolarissearch.technologystoragesearch.technologylinuxsearch.technologyserverssearch.technologyvirtualizationsearch.technologyengineeredsystemspcodewwmkmppscem:",
|
"oow.jspsearch.eventoracleopenworldsearch.technologyoraclesolarissearch.technologystoragesearch.technologylinuxsearch.technologyserverssearch.technologyvirtualizationsearch.technologyengineeredsystemspcodewwmkmppscem:",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(2626)
|
||||||
def test_issue2626_2835(en_tokenizer, text):
|
def test_issue2626_2835(en_tokenizer, text):
|
||||||
"""Check that sentence doesn't cause an infinite loop in the tokenizer."""
|
"""Check that sentence doesn't cause an infinite loop in the tokenizer."""
|
||||||
doc = en_tokenizer(text)
|
doc = en_tokenizer(text)
|
||||||
assert doc
|
assert doc
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2656)
|
||||||
def test_issue2656(en_tokenizer):
|
def test_issue2656(en_tokenizer):
|
||||||
"""Test that tokenizer correctly splits off punctuation after numbers with
|
"""Test that tokenizer correctly splits off punctuation after numbers with
|
||||||
decimal points.
|
decimal points.
|
||||||
|
@ -71,6 +75,7 @@ def test_issue2656(en_tokenizer):
|
||||||
assert doc[10].text == "."
|
assert doc[10].text == "."
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2671)
|
||||||
def test_issue2671():
|
def test_issue2671():
|
||||||
"""Ensure the correct entity ID is returned for matches with quantifiers.
|
"""Ensure the correct entity ID is returned for matches with quantifiers.
|
||||||
See also #2675
|
See also #2675
|
||||||
|
@ -94,6 +99,7 @@ def test_issue2671():
|
||||||
assert nlp.vocab.strings[match_id] == pattern_id
|
assert nlp.vocab.strings[match_id] == pattern_id
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2728)
|
||||||
def test_issue2728(en_vocab):
|
def test_issue2728(en_vocab):
|
||||||
"""Test that displaCy ENT visualizer escapes HTML correctly."""
|
"""Test that displaCy ENT visualizer escapes HTML correctly."""
|
||||||
doc = Doc(en_vocab, words=["test", "<RELEASE>", "test"])
|
doc = Doc(en_vocab, words=["test", "<RELEASE>", "test"])
|
||||||
|
@ -105,6 +111,7 @@ def test_issue2728(en_vocab):
|
||||||
assert "<RELEASE>" in html
|
assert "<RELEASE>" in html
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2754)
|
||||||
def test_issue2754(en_tokenizer):
|
def test_issue2754(en_tokenizer):
|
||||||
"""Test that words like 'a' and 'a.m.' don't get exceptional norm values."""
|
"""Test that words like 'a' and 'a.m.' don't get exceptional norm values."""
|
||||||
a = en_tokenizer("a")
|
a = en_tokenizer("a")
|
||||||
|
@ -113,6 +120,7 @@ def test_issue2754(en_tokenizer):
|
||||||
assert am[0].norm_ == "am"
|
assert am[0].norm_ == "am"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2772)
|
||||||
def test_issue2772(en_vocab):
|
def test_issue2772(en_vocab):
|
||||||
"""Test that deprojectivization doesn't mess up sentence boundaries."""
|
"""Test that deprojectivization doesn't mess up sentence boundaries."""
|
||||||
# fmt: off
|
# fmt: off
|
||||||
|
@ -128,6 +136,7 @@ def test_issue2772(en_vocab):
|
||||||
|
|
||||||
@pytest.mark.parametrize("text", ["-0.23", "+123,456", "±1"])
|
@pytest.mark.parametrize("text", ["-0.23", "+123,456", "±1"])
|
||||||
@pytest.mark.parametrize("lang_cls", [English, MultiLanguage])
|
@pytest.mark.parametrize("lang_cls", [English, MultiLanguage])
|
||||||
|
@pytest.mark.issue(2782)
|
||||||
def test_issue2782(text, lang_cls):
|
def test_issue2782(text, lang_cls):
|
||||||
"""Check that like_num handles + and - before number."""
|
"""Check that like_num handles + and - before number."""
|
||||||
nlp = lang_cls()
|
nlp = lang_cls()
|
||||||
|
@ -136,6 +145,7 @@ def test_issue2782(text, lang_cls):
|
||||||
assert doc[0].like_num
|
assert doc[0].like_num
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2800)
|
||||||
def test_issue2800():
|
def test_issue2800():
|
||||||
"""Test issue that arises when too many labels are added to NER model.
|
"""Test issue that arises when too many labels are added to NER model.
|
||||||
Used to cause segfault.
|
Used to cause segfault.
|
||||||
|
@ -157,6 +167,7 @@ def test_issue2800():
|
||||||
nlp.update([example], sgd=optimizer, losses=losses, drop=0.5)
|
nlp.update([example], sgd=optimizer, losses=losses, drop=0.5)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2822)
|
||||||
def test_issue2822(it_tokenizer):
|
def test_issue2822(it_tokenizer):
|
||||||
"""Test that the abbreviation of poco is kept as one word."""
|
"""Test that the abbreviation of poco is kept as one word."""
|
||||||
doc = it_tokenizer("Vuoi un po' di zucchero?")
|
doc = it_tokenizer("Vuoi un po' di zucchero?")
|
||||||
|
@ -169,6 +180,7 @@ def test_issue2822(it_tokenizer):
|
||||||
assert doc[5].text == "?"
|
assert doc[5].text == "?"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2833)
|
||||||
def test_issue2833(en_vocab):
|
def test_issue2833(en_vocab):
|
||||||
"""Test that a custom error is raised if a token or span is pickled."""
|
"""Test that a custom error is raised if a token or span is pickled."""
|
||||||
doc = Doc(en_vocab, words=["Hello", "world"])
|
doc = Doc(en_vocab, words=["Hello", "world"])
|
||||||
|
@ -178,6 +190,7 @@ def test_issue2833(en_vocab):
|
||||||
pickle.dumps(doc[0:2])
|
pickle.dumps(doc[0:2])
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2871)
|
||||||
def test_issue2871():
|
def test_issue2871():
|
||||||
"""Test that vectors recover the correct key for spaCy reserved words."""
|
"""Test that vectors recover the correct key for spaCy reserved words."""
|
||||||
words = ["dog", "cat", "SUFFIX"]
|
words = ["dog", "cat", "SUFFIX"]
|
||||||
|
@ -196,6 +209,7 @@ def test_issue2871():
|
||||||
assert vocab.vectors.find(key="SUFFIX") == 2
|
assert vocab.vectors.find(key="SUFFIX") == 2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2901)
|
||||||
def test_issue2901():
|
def test_issue2901():
|
||||||
"""Test that `nlp` doesn't fail."""
|
"""Test that `nlp` doesn't fail."""
|
||||||
try:
|
try:
|
||||||
|
@ -207,6 +221,7 @@ def test_issue2901():
|
||||||
assert doc
|
assert doc
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(2926)
|
||||||
def test_issue2926(fr_tokenizer):
|
def test_issue2926(fr_tokenizer):
|
||||||
"""Test that the tokenizer correctly splits tokens separated by a slash (/)
|
"""Test that the tokenizer correctly splits tokens separated by a slash (/)
|
||||||
ending in a digit.
|
ending in a digit.
|
||||||
|
|
|
@ -14,6 +14,7 @@ from spacy.vectors import Vectors
|
||||||
import numpy
|
import numpy
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3002)
|
||||||
def test_issue3002():
|
def test_issue3002():
|
||||||
"""Test that the tokenizer doesn't hang on a long list of dots"""
|
"""Test that the tokenizer doesn't hang on a long list of dots"""
|
||||||
nlp = German()
|
nlp = German()
|
||||||
|
@ -23,6 +24,7 @@ def test_issue3002():
|
||||||
assert len(doc) == 5
|
assert len(doc) == 5
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3009)
|
||||||
def test_issue3009(en_vocab):
|
def test_issue3009(en_vocab):
|
||||||
"""Test problem with matcher quantifiers"""
|
"""Test problem with matcher quantifiers"""
|
||||||
patterns = [
|
patterns = [
|
||||||
|
@ -53,6 +55,7 @@ def test_issue3009(en_vocab):
|
||||||
assert matches
|
assert matches
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3012)
|
||||||
def test_issue3012(en_vocab):
|
def test_issue3012(en_vocab):
|
||||||
"""Test that the is_tagged attribute doesn't get overwritten when we from_array
|
"""Test that the is_tagged attribute doesn't get overwritten when we from_array
|
||||||
without tag information."""
|
without tag information."""
|
||||||
|
@ -74,6 +77,7 @@ def test_issue3012(en_vocab):
|
||||||
assert (doc2[2].text, doc2[2].pos_, doc2[2].tag_, doc2[2].ent_type_) == expected
|
assert (doc2[2].text, doc2[2].pos_, doc2[2].tag_, doc2[2].ent_type_) == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3199)
|
||||||
def test_issue3199():
|
def test_issue3199():
|
||||||
"""Test that Span.noun_chunks works correctly if no noun chunks iterator
|
"""Test that Span.noun_chunks works correctly if no noun chunks iterator
|
||||||
is available. To make this test future-proof, we're constructing a Doc
|
is available. To make this test future-proof, we're constructing a Doc
|
||||||
|
@ -85,6 +89,7 @@ def test_issue3199():
|
||||||
list(doc[0:3].noun_chunks)
|
list(doc[0:3].noun_chunks)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3209)
|
||||||
def test_issue3209():
|
def test_issue3209():
|
||||||
"""Test issue that occurred in spaCy nightly where NER labels were being
|
"""Test issue that occurred in spaCy nightly where NER labels were being
|
||||||
mapped to classes incorrectly after loading the model, when the labels
|
mapped to classes incorrectly after loading the model, when the labels
|
||||||
|
@ -104,6 +109,7 @@ def test_issue3209():
|
||||||
assert ner2.move_names == move_names
|
assert ner2.move_names == move_names
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3248)
|
||||||
def test_issue3248_1():
|
def test_issue3248_1():
|
||||||
"""Test that the PhraseMatcher correctly reports its number of rules, not
|
"""Test that the PhraseMatcher correctly reports its number of rules, not
|
||||||
total number of patterns."""
|
total number of patterns."""
|
||||||
|
@ -114,6 +120,7 @@ def test_issue3248_1():
|
||||||
assert len(matcher) == 2
|
assert len(matcher) == 2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3248)
|
||||||
def test_issue3248_2():
|
def test_issue3248_2():
|
||||||
"""Test that the PhraseMatcher can be pickled correctly."""
|
"""Test that the PhraseMatcher can be pickled correctly."""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -125,6 +132,7 @@ def test_issue3248_2():
|
||||||
assert len(new_matcher) == len(matcher)
|
assert len(new_matcher) == len(matcher)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3277)
|
||||||
def test_issue3277(es_tokenizer):
|
def test_issue3277(es_tokenizer):
|
||||||
"""Test that hyphens are split correctly as prefixes."""
|
"""Test that hyphens are split correctly as prefixes."""
|
||||||
doc = es_tokenizer("—Yo me llamo... –murmuró el niño– Emilio Sánchez Pérez.")
|
doc = es_tokenizer("—Yo me llamo... –murmuró el niño– Emilio Sánchez Pérez.")
|
||||||
|
@ -134,6 +142,7 @@ def test_issue3277(es_tokenizer):
|
||||||
assert doc[9].text == "\u2013"
|
assert doc[9].text == "\u2013"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3288)
|
||||||
def test_issue3288(en_vocab):
|
def test_issue3288(en_vocab):
|
||||||
"""Test that retokenization works correctly via displaCy when punctuation
|
"""Test that retokenization works correctly via displaCy when punctuation
|
||||||
is merged onto the preceeding token and tensor is resized."""
|
is merged onto the preceeding token and tensor is resized."""
|
||||||
|
@ -145,6 +154,7 @@ def test_issue3288(en_vocab):
|
||||||
displacy.render(doc)
|
displacy.render(doc)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3289)
|
||||||
def test_issue3289():
|
def test_issue3289():
|
||||||
"""Test that Language.to_bytes handles serializing a pipeline component
|
"""Test that Language.to_bytes handles serializing a pipeline component
|
||||||
with an uninitialized model."""
|
with an uninitialized model."""
|
||||||
|
@ -156,6 +166,7 @@ def test_issue3289():
|
||||||
new_nlp.from_bytes(bytes_data)
|
new_nlp.from_bytes(bytes_data)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3328)
|
||||||
def test_issue3328(en_vocab):
|
def test_issue3328(en_vocab):
|
||||||
doc = Doc(en_vocab, words=["Hello", ",", "how", "are", "you", "doing", "?"])
|
doc = Doc(en_vocab, words=["Hello", ",", "how", "are", "you", "doing", "?"])
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
|
@ -170,6 +181,7 @@ def test_issue3328(en_vocab):
|
||||||
assert matched_texts == ["Hello", "how", "you", "doing"]
|
assert matched_texts == ["Hello", "how", "you", "doing"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3331)
|
||||||
def test_issue3331(en_vocab):
|
def test_issue3331(en_vocab):
|
||||||
"""Test that duplicate patterns for different rules result in multiple
|
"""Test that duplicate patterns for different rules result in multiple
|
||||||
matches, one per rule.
|
matches, one per rule.
|
||||||
|
@ -184,6 +196,7 @@ def test_issue3331(en_vocab):
|
||||||
assert sorted(match_ids) == ["A", "B"]
|
assert sorted(match_ids) == ["A", "B"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3345)
|
||||||
def test_issue3345():
|
def test_issue3345():
|
||||||
"""Test case where preset entity crosses sentence boundary."""
|
"""Test case where preset entity crosses sentence boundary."""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -206,6 +219,7 @@ def test_issue3345():
|
||||||
assert ner.moves.is_valid(state, "B-GPE")
|
assert ner.moves.is_valid(state, "B-GPE")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3412)
|
||||||
def test_issue3412():
|
def test_issue3412():
|
||||||
data = numpy.asarray([[0, 0, 0], [1, 2, 3], [9, 8, 7]], dtype="f")
|
data = numpy.asarray([[0, 0, 0], [1, 2, 3], [9, 8, 7]], dtype="f")
|
||||||
vectors = Vectors(data=data, keys=["A", "B", "C"])
|
vectors = Vectors(data=data, keys=["A", "B", "C"])
|
||||||
|
@ -216,6 +230,7 @@ def test_issue3412():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="default suffix rules avoid one upper-case letter before dot")
|
@pytest.mark.skip(reason="default suffix rules avoid one upper-case letter before dot")
|
||||||
|
@pytest.mark.issue(3449)
|
||||||
def test_issue3449():
|
def test_issue3449():
|
||||||
nlp = English()
|
nlp = English()
|
||||||
nlp.add_pipe("sentencizer")
|
nlp.add_pipe("sentencizer")
|
||||||
|
@ -230,6 +245,7 @@ def test_issue3449():
|
||||||
assert t3[5].text == "I"
|
assert t3[5].text == "I"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3456)
|
||||||
def test_issue3456():
|
def test_issue3456():
|
||||||
# this crashed because of a padding error in layer.ops.unflatten in thinc
|
# this crashed because of a padding error in layer.ops.unflatten in thinc
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -239,6 +255,7 @@ def test_issue3456():
|
||||||
list(nlp.pipe(["hi", ""]))
|
list(nlp.pipe(["hi", ""]))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3468)
|
||||||
def test_issue3468():
|
def test_issue3468():
|
||||||
"""Test that sentence boundaries are set correctly so Doc.has_annotation("SENT_START") can
|
"""Test that sentence boundaries are set correctly so Doc.has_annotation("SENT_START") can
|
||||||
be restored after serialization."""
|
be restored after serialization."""
|
||||||
|
|
|
@ -24,6 +24,7 @@ from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("word", ["don't", "don’t", "I'd", "I’d"])
|
@pytest.mark.parametrize("word", ["don't", "don’t", "I'd", "I’d"])
|
||||||
|
@pytest.mark.issue(3521)
|
||||||
def test_issue3521(en_tokenizer, word):
|
def test_issue3521(en_tokenizer, word):
|
||||||
tok = en_tokenizer(word)[1]
|
tok = en_tokenizer(word)[1]
|
||||||
# 'not' and 'would' should be stopwords, also in their abbreviated forms
|
# 'not' and 'would' should be stopwords, also in their abbreviated forms
|
||||||
|
@ -108,6 +109,7 @@ def test_issue_3526_4(en_vocab):
|
||||||
assert new_ruler.overwrite is True
|
assert new_ruler.overwrite is True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3531)
|
||||||
def test_issue3531():
|
def test_issue3531():
|
||||||
"""Test that displaCy renderer doesn't require "settings" key."""
|
"""Test that displaCy renderer doesn't require "settings" key."""
|
||||||
example_dep = {
|
example_dep = {
|
||||||
|
@ -137,6 +139,7 @@ def test_issue3531():
|
||||||
assert ent_html
|
assert ent_html
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3540)
|
||||||
def test_issue3540(en_vocab):
|
def test_issue3540(en_vocab):
|
||||||
words = ["I", "live", "in", "NewYork", "right", "now"]
|
words = ["I", "live", "in", "NewYork", "right", "now"]
|
||||||
tensor = numpy.asarray(
|
tensor = numpy.asarray(
|
||||||
|
@ -176,6 +179,7 @@ def test_issue3540(en_vocab):
|
||||||
assert vectors_1[5].tolist() == vectors_2[6].tolist()
|
assert vectors_1[5].tolist() == vectors_2[6].tolist()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3549)
|
||||||
def test_issue3549(en_vocab):
|
def test_issue3549(en_vocab):
|
||||||
"""Test that match pattern validation doesn't raise on empty errors."""
|
"""Test that match pattern validation doesn't raise on empty errors."""
|
||||||
matcher = Matcher(en_vocab, validate=True)
|
matcher = Matcher(en_vocab, validate=True)
|
||||||
|
@ -186,6 +190,7 @@ def test_issue3549(en_vocab):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip("Matching currently only works on strings and integers")
|
@pytest.mark.skip("Matching currently only works on strings and integers")
|
||||||
|
@pytest.mark.issue(3555)
|
||||||
def test_issue3555(en_vocab):
|
def test_issue3555(en_vocab):
|
||||||
"""Test that custom extensions with default None don't break matcher."""
|
"""Test that custom extensions with default None don't break matcher."""
|
||||||
Token.set_extension("issue3555", default=None)
|
Token.set_extension("issue3555", default=None)
|
||||||
|
@ -196,6 +201,7 @@ def test_issue3555(en_vocab):
|
||||||
matcher(doc)
|
matcher(doc)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3611)
|
||||||
def test_issue3611():
|
def test_issue3611():
|
||||||
"""Test whether adding n-grams in the textcat works even when n > token length of some docs"""
|
"""Test whether adding n-grams in the textcat works even when n > token length of some docs"""
|
||||||
unique_classes = ["offensive", "inoffensive"]
|
unique_classes = ["offensive", "inoffensive"]
|
||||||
|
@ -232,6 +238,7 @@ def test_issue3611():
|
||||||
nlp.update(examples=batch, sgd=optimizer, drop=0.1, losses=losses)
|
nlp.update(examples=batch, sgd=optimizer, drop=0.1, losses=losses)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3625)
|
||||||
def test_issue3625():
|
def test_issue3625():
|
||||||
"""Test that default punctuation rules applies to hindi unicode characters"""
|
"""Test that default punctuation rules applies to hindi unicode characters"""
|
||||||
nlp = Hindi()
|
nlp = Hindi()
|
||||||
|
@ -240,6 +247,7 @@ def test_issue3625():
|
||||||
assert [token.text for token in doc] == expected
|
assert [token.text for token in doc] == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3803)
|
||||||
def test_issue3803():
|
def test_issue3803():
|
||||||
"""Test that spanish num-like tokens have True for like_num attribute."""
|
"""Test that spanish num-like tokens have True for like_num attribute."""
|
||||||
nlp = Spanish()
|
nlp = Spanish()
|
||||||
|
@ -255,6 +263,7 @@ def _parser_example(parser):
|
||||||
return Example.from_dict(doc, gold)
|
return Example.from_dict(doc, gold)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3830)
|
||||||
def test_issue3830_no_subtok():
|
def test_issue3830_no_subtok():
|
||||||
"""Test that the parser doesn't have subtok label if not learn_tokens"""
|
"""Test that the parser doesn't have subtok label if not learn_tokens"""
|
||||||
config = {
|
config = {
|
||||||
|
@ -268,6 +277,7 @@ def test_issue3830_no_subtok():
|
||||||
assert "subtok" not in parser.labels
|
assert "subtok" not in parser.labels
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3830)
|
||||||
def test_issue3830_with_subtok():
|
def test_issue3830_with_subtok():
|
||||||
"""Test that the parser does have subtok label if learn_tokens=True."""
|
"""Test that the parser does have subtok label if learn_tokens=True."""
|
||||||
config = {
|
config = {
|
||||||
|
@ -281,6 +291,7 @@ def test_issue3830_with_subtok():
|
||||||
assert "subtok" in parser.labels
|
assert "subtok" in parser.labels
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3839)
|
||||||
def test_issue3839(en_vocab):
|
def test_issue3839(en_vocab):
|
||||||
"""Test that match IDs returned by the matcher are correct, are in the string"""
|
"""Test that match IDs returned by the matcher are correct, are in the string"""
|
||||||
doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])
|
doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])
|
||||||
|
@ -307,6 +318,7 @@ def test_issue3839(en_vocab):
|
||||||
"It was a missed assignment, but it shouldn't have resulted in a turnover ...",
|
"It was a missed assignment, but it shouldn't have resulted in a turnover ...",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(3869)
|
||||||
def test_issue3869(sentence):
|
def test_issue3869(sentence):
|
||||||
"""Test that the Doc's count_by function works consistently"""
|
"""Test that the Doc's count_by function works consistently"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -317,6 +329,7 @@ def test_issue3869(sentence):
|
||||||
assert count == doc.count_by(IS_ALPHA).get(1, 0)
|
assert count == doc.count_by(IS_ALPHA).get(1, 0)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3879)
|
||||||
def test_issue3879(en_vocab):
|
def test_issue3879(en_vocab):
|
||||||
doc = Doc(en_vocab, words=["This", "is", "a", "test", "."])
|
doc = Doc(en_vocab, words=["This", "is", "a", "test", "."])
|
||||||
assert len(doc) == 5
|
assert len(doc) == 5
|
||||||
|
@ -326,6 +339,7 @@ def test_issue3879(en_vocab):
|
||||||
assert len(matcher(doc)) == 2 # fails because of a FP match 'is a test'
|
assert len(matcher(doc)) == 2 # fails because of a FP match 'is a test'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3880)
|
||||||
def test_issue3880():
|
def test_issue3880():
|
||||||
"""Test that `nlp.pipe()` works when an empty string ends the batch.
|
"""Test that `nlp.pipe()` works when an empty string ends the batch.
|
||||||
|
|
||||||
|
@ -341,6 +355,7 @@ def test_issue3880():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3882)
|
||||||
def test_issue3882(en_vocab):
|
def test_issue3882(en_vocab):
|
||||||
"""Test that displaCy doesn't serialize the doc.user_data when making a
|
"""Test that displaCy doesn't serialize the doc.user_data when making a
|
||||||
copy of the Doc.
|
copy of the Doc.
|
||||||
|
@ -350,6 +365,7 @@ def test_issue3882(en_vocab):
|
||||||
parse_deps(doc)
|
parse_deps(doc)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3951)
|
||||||
def test_issue3951(en_vocab):
|
def test_issue3951(en_vocab):
|
||||||
"""Test that combinations of optional rules are matched correctly."""
|
"""Test that combinations of optional rules are matched correctly."""
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
|
@ -365,6 +381,7 @@ def test_issue3951(en_vocab):
|
||||||
assert len(matches) == 0
|
assert len(matches) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3959)
|
||||||
def test_issue3959():
|
def test_issue3959():
|
||||||
"""Ensure that a modified pos attribute is serialized correctly."""
|
"""Ensure that a modified pos attribute is serialized correctly."""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -383,6 +400,7 @@ def test_issue3959():
|
||||||
assert doc2[0].pos_ == "NOUN"
|
assert doc2[0].pos_ == "NOUN"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3962)
|
||||||
def test_issue3962(en_vocab):
|
def test_issue3962(en_vocab):
|
||||||
"""Ensure that as_doc does not result in out-of-bound access of tokens.
|
"""Ensure that as_doc does not result in out-of-bound access of tokens.
|
||||||
This is achieved by setting the head to itself if it would lie out of the span otherwise."""
|
This is achieved by setting the head to itself if it would lie out of the span otherwise."""
|
||||||
|
@ -421,6 +439,7 @@ def test_issue3962(en_vocab):
|
||||||
assert len(list(doc3.sents)) == 1
|
assert len(list(doc3.sents)) == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3962)
|
||||||
def test_issue3962_long(en_vocab):
|
def test_issue3962_long(en_vocab):
|
||||||
"""Ensure that as_doc does not result in out-of-bound access of tokens.
|
"""Ensure that as_doc does not result in out-of-bound access of tokens.
|
||||||
This is achieved by setting the head to itself if it would lie out of the span otherwise."""
|
This is achieved by setting the head to itself if it would lie out of the span otherwise."""
|
||||||
|
@ -456,6 +475,7 @@ def test_issue3962_long(en_vocab):
|
||||||
assert sents[1].text == "They never"
|
assert sents[1].text == "They never"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(3972)
|
||||||
def test_issue3972(en_vocab):
|
def test_issue3972(en_vocab):
|
||||||
"""Test that the PhraseMatcher returns duplicates for duplicate match IDs."""
|
"""Test that the PhraseMatcher returns duplicates for duplicate match IDs."""
|
||||||
matcher = PhraseMatcher(en_vocab)
|
matcher = PhraseMatcher(en_vocab)
|
||||||
|
|
|
@ -17,6 +17,7 @@ from thinc.api import compounding
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4002)
|
||||||
def test_issue4002(en_vocab):
|
def test_issue4002(en_vocab):
|
||||||
"""Test that the PhraseMatcher can match on overwritten NORM attributes."""
|
"""Test that the PhraseMatcher can match on overwritten NORM attributes."""
|
||||||
matcher = PhraseMatcher(en_vocab, attr="NORM")
|
matcher = PhraseMatcher(en_vocab, attr="NORM")
|
||||||
|
@ -37,6 +38,7 @@ def test_issue4002(en_vocab):
|
||||||
assert len(matches) == 1
|
assert len(matches) == 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4030)
|
||||||
def test_issue4030():
|
def test_issue4030():
|
||||||
"""Test whether textcat works fine with empty doc"""
|
"""Test whether textcat works fine with empty doc"""
|
||||||
unique_classes = ["offensive", "inoffensive"]
|
unique_classes = ["offensive", "inoffensive"]
|
||||||
|
@ -77,6 +79,7 @@ def test_issue4030():
|
||||||
assert doc.cats["inoffensive"] == 0.0
|
assert doc.cats["inoffensive"] == 0.0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4042)
|
||||||
def test_issue4042():
|
def test_issue4042():
|
||||||
"""Test that serialization of an EntityRuler before NER works fine."""
|
"""Test that serialization of an EntityRuler before NER works fine."""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -105,6 +108,7 @@ def test_issue4042():
|
||||||
assert doc2.ents[0].label_ == "MY_ORG"
|
assert doc2.ents[0].label_ == "MY_ORG"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4042)
|
||||||
def test_issue4042_bug2():
|
def test_issue4042_bug2():
|
||||||
"""
|
"""
|
||||||
Test that serialization of an NER works fine when new labels were added.
|
Test that serialization of an NER works fine when new labels were added.
|
||||||
|
@ -139,6 +143,7 @@ def test_issue4042_bug2():
|
||||||
assert len(ner2.labels) == 2
|
assert len(ner2.labels) == 2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4054)
|
||||||
def test_issue4054(en_vocab):
|
def test_issue4054(en_vocab):
|
||||||
"""Test that a new blank model can be made with a vocab from file,
|
"""Test that a new blank model can be made with a vocab from file,
|
||||||
and that serialization does not drop the language at any point."""
|
and that serialization does not drop the language at any point."""
|
||||||
|
@ -159,6 +164,7 @@ def test_issue4054(en_vocab):
|
||||||
assert nlp3.lang == "en"
|
assert nlp3.lang == "en"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4120)
|
||||||
def test_issue4120(en_vocab):
|
def test_issue4120(en_vocab):
|
||||||
"""Test that matches without a final {OP: ?} token are returned."""
|
"""Test that matches without a final {OP: ?} token are returned."""
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
|
@ -177,6 +183,7 @@ def test_issue4120(en_vocab):
|
||||||
assert len(matcher(doc4)) == 3 # fixed
|
assert len(matcher(doc4)) == 3 # fixed
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4133)
|
||||||
def test_issue4133(en_vocab):
|
def test_issue4133(en_vocab):
|
||||||
nlp = English()
|
nlp = English()
|
||||||
vocab_bytes = nlp.vocab.to_bytes()
|
vocab_bytes = nlp.vocab.to_bytes()
|
||||||
|
@ -196,6 +203,7 @@ def test_issue4133(en_vocab):
|
||||||
assert actual == pos
|
assert actual == pos
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4190)
|
||||||
def test_issue4190():
|
def test_issue4190():
|
||||||
def customize_tokenizer(nlp):
|
def customize_tokenizer(nlp):
|
||||||
prefix_re = compile_prefix_regex(nlp.Defaults.prefixes)
|
prefix_re = compile_prefix_regex(nlp.Defaults.prefixes)
|
||||||
|
@ -236,6 +244,7 @@ def test_issue4190():
|
||||||
assert result_1b == result_2
|
assert result_1b == result_2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4267)
|
||||||
def test_issue4267():
|
def test_issue4267():
|
||||||
"""Test that running an entity_ruler after ner gives consistent results"""
|
"""Test that running an entity_ruler after ner gives consistent results"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -262,6 +271,7 @@ def test_issue4267():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="lemmatizer lookups no longer in vocab")
|
@pytest.mark.skip(reason="lemmatizer lookups no longer in vocab")
|
||||||
|
@pytest.mark.issue(4272)
|
||||||
def test_issue4272():
|
def test_issue4272():
|
||||||
"""Test that lookup table can be accessed from Token.lemma if no POS tags
|
"""Test that lookup table can be accessed from Token.lemma if no POS tags
|
||||||
are available."""
|
are available."""
|
||||||
|
@ -287,6 +297,7 @@ def test_multiple_predictions():
|
||||||
dummy_pipe(doc)
|
dummy_pipe(doc)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4313)
|
||||||
def test_issue4313():
|
def test_issue4313():
|
||||||
"""This should not crash or exit with some strange error code"""
|
"""This should not crash or exit with some strange error code"""
|
||||||
beam_width = 16
|
beam_width = 16
|
||||||
|
@ -313,6 +324,7 @@ def test_issue4313():
|
||||||
assert "MY_ORG" in ner.labels
|
assert "MY_ORG" in ner.labels
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4348)
|
||||||
def test_issue4348():
|
def test_issue4348():
|
||||||
"""Test that training the tagger with empty data, doesn't throw errors"""
|
"""Test that training the tagger with empty data, doesn't throw errors"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -328,6 +340,7 @@ def test_issue4348():
|
||||||
nlp.update(batch, sgd=optimizer, losses=losses)
|
nlp.update(batch, sgd=optimizer, losses=losses)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4367)
|
||||||
def test_issue4367():
|
def test_issue4367():
|
||||||
"""Test that docbin init goes well"""
|
"""Test that docbin init goes well"""
|
||||||
DocBin()
|
DocBin()
|
||||||
|
@ -335,6 +348,7 @@ def test_issue4367():
|
||||||
DocBin(attrs=["LEMMA", "ENT_IOB", "ENT_TYPE"])
|
DocBin(attrs=["LEMMA", "ENT_IOB", "ENT_TYPE"])
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4373)
|
||||||
def test_issue4373():
|
def test_issue4373():
|
||||||
"""Test that PhraseMatcher.vocab can be accessed (like Matcher.vocab)."""
|
"""Test that PhraseMatcher.vocab can be accessed (like Matcher.vocab)."""
|
||||||
matcher = Matcher(Vocab())
|
matcher = Matcher(Vocab())
|
||||||
|
@ -343,6 +357,7 @@ def test_issue4373():
|
||||||
assert isinstance(matcher.vocab, Vocab)
|
assert isinstance(matcher.vocab, Vocab)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4402)
|
||||||
def test_issue4402():
|
def test_issue4402():
|
||||||
json_data = {
|
json_data = {
|
||||||
"id": 0,
|
"id": 0,
|
||||||
|
|
|
@ -14,6 +14,7 @@ from thinc.api import NumpyOps, get_current_ops
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4528)
|
||||||
def test_issue4528(en_vocab):
|
def test_issue4528(en_vocab):
|
||||||
"""Test that user_data is correctly serialized in DocBin."""
|
"""Test that user_data is correctly serialized in DocBin."""
|
||||||
doc = Doc(en_vocab, words=["hello", "world"])
|
doc = Doc(en_vocab, words=["hello", "world"])
|
||||||
|
@ -37,6 +38,7 @@ def test_gold_misaligned(en_tokenizer, text, words):
|
||||||
Example.from_dict(doc, {"words": words})
|
Example.from_dict(doc, {"words": words})
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4651)
|
||||||
def test_issue4651_with_phrase_matcher_attr():
|
def test_issue4651_with_phrase_matcher_attr():
|
||||||
"""Test that the EntityRuler PhraseMatcher is deserialized correctly using
|
"""Test that the EntityRuler PhraseMatcher is deserialized correctly using
|
||||||
the method from_disk when the EntityRuler argument phrase_matcher_attr is
|
the method from_disk when the EntityRuler argument phrase_matcher_attr is
|
||||||
|
@ -59,6 +61,7 @@ def test_issue4651_with_phrase_matcher_attr():
|
||||||
assert res == res_reloaded
|
assert res == res_reloaded
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4651)
|
||||||
def test_issue4651_without_phrase_matcher_attr():
|
def test_issue4651_without_phrase_matcher_attr():
|
||||||
"""Test that the EntityRuler PhraseMatcher is deserialized correctly using
|
"""Test that the EntityRuler PhraseMatcher is deserialized correctly using
|
||||||
the method from_disk when the EntityRuler argument phrase_matcher_attr is
|
the method from_disk when the EntityRuler argument phrase_matcher_attr is
|
||||||
|
@ -81,6 +84,7 @@ def test_issue4651_without_phrase_matcher_attr():
|
||||||
assert res == res_reloaded
|
assert res == res_reloaded
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4665)
|
||||||
def test_issue4665():
|
def test_issue4665():
|
||||||
"""
|
"""
|
||||||
conllu_to_docs should not raise an exception if the HEAD column contains an
|
conllu_to_docs should not raise an exception if the HEAD column contains an
|
||||||
|
@ -109,6 +113,7 @@ def test_issue4665():
|
||||||
conllu_to_docs(input_data)
|
conllu_to_docs(input_data)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4674)
|
||||||
def test_issue4674():
|
def test_issue4674():
|
||||||
"""Test that setting entities with overlapping identifiers does not mess up IO"""
|
"""Test that setting entities with overlapping identifiers does not mess up IO"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -135,6 +140,7 @@ def test_issue4674():
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="API change: disable just disables, new exclude arg")
|
@pytest.mark.skip(reason="API change: disable just disables, new exclude arg")
|
||||||
|
@pytest.mark.issue(4707)
|
||||||
def test_issue4707():
|
def test_issue4707():
|
||||||
"""Tests that disabled component names are also excluded from nlp.from_disk
|
"""Tests that disabled component names are also excluded from nlp.from_disk
|
||||||
by default when loading a model.
|
by default when loading a model.
|
||||||
|
@ -151,6 +157,7 @@ def test_issue4707():
|
||||||
assert "entity_ruler" in new_nlp.pipe_names
|
assert "entity_ruler" in new_nlp.pipe_names
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4725)
|
||||||
def test_issue4725_1():
|
def test_issue4725_1():
|
||||||
"""Ensure the pickling of the NER goes well"""
|
"""Ensure the pickling of the NER goes well"""
|
||||||
vocab = Vocab(vectors_name="test_vocab_add_vector")
|
vocab = Vocab(vectors_name="test_vocab_add_vector")
|
||||||
|
@ -169,6 +176,7 @@ def test_issue4725_1():
|
||||||
assert ner2.cfg["update_with_oracle_cut_size"] == 111
|
assert ner2.cfg["update_with_oracle_cut_size"] == 111
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4725)
|
||||||
def test_issue4725_2():
|
def test_issue4725_2():
|
||||||
if isinstance(get_current_ops, NumpyOps):
|
if isinstance(get_current_ops, NumpyOps):
|
||||||
# ensures that this runs correctly and doesn't hang or crash because of the global vectors
|
# ensures that this runs correctly and doesn't hang or crash because of the global vectors
|
||||||
|
@ -188,6 +196,7 @@ def test_issue4725_2():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4849)
|
||||||
def test_issue4849():
|
def test_issue4849():
|
||||||
nlp = English()
|
nlp = English()
|
||||||
patterns = [
|
patterns = [
|
||||||
|
@ -235,6 +244,7 @@ class CustomPipe:
|
||||||
return str(span.end)
|
return str(span.end)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4903)
|
||||||
def test_issue4903():
|
def test_issue4903():
|
||||||
"""Ensure that this runs correctly and doesn't hang or crash on Windows /
|
"""Ensure that this runs correctly and doesn't hang or crash on Windows /
|
||||||
macOS."""
|
macOS."""
|
||||||
|
@ -249,6 +259,7 @@ def test_issue4903():
|
||||||
assert docs[2].text == "No, I prefer wasabi."
|
assert docs[2].text == "No, I prefer wasabi."
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(4924)
|
||||||
def test_issue4924():
|
def test_issue4924():
|
||||||
nlp = Language()
|
nlp = Language()
|
||||||
example = Example.from_dict(nlp.make_doc(""), {})
|
example = Example.from_dict(nlp.make_doc(""), {})
|
||||||
|
|
|
@ -12,6 +12,7 @@ import pytest
|
||||||
from ...util import make_tempdir
|
from ...util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(5048)
|
||||||
def test_issue5048(en_vocab):
|
def test_issue5048(en_vocab):
|
||||||
words = ["This", "is", "a", "sentence"]
|
words = ["This", "is", "a", "sentence"]
|
||||||
pos_s = ["DET", "VERB", "DET", "NOUN"]
|
pos_s = ["DET", "VERB", "DET", "NOUN"]
|
||||||
|
@ -34,6 +35,7 @@ def test_issue5048(en_vocab):
|
||||||
assert v1 == v2
|
assert v1 == v2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(5082)
|
||||||
def test_issue5082():
|
def test_issue5082():
|
||||||
# Ensure the 'merge_entities' pipeline does something sensible for the vectors of the merged tokens
|
# Ensure the 'merge_entities' pipeline does something sensible for the vectors of the merged tokens
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -68,6 +70,7 @@ def test_issue5082():
|
||||||
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[2]), array34)
|
numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[2]), array34)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(5137)
|
||||||
def test_issue5137():
|
def test_issue5137():
|
||||||
factory_name = "test_issue5137"
|
factory_name = "test_issue5137"
|
||||||
pipe_name = "my_component"
|
pipe_name = "my_component"
|
||||||
|
@ -98,6 +101,7 @@ def test_issue5137():
|
||||||
assert nlp2.get_pipe(pipe_name).categories == "my_categories"
|
assert nlp2.get_pipe(pipe_name).categories == "my_categories"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(5141)
|
||||||
def test_issue5141(en_vocab):
|
def test_issue5141(en_vocab):
|
||||||
"""Ensure an empty DocBin does not crash on serialization"""
|
"""Ensure an empty DocBin does not crash on serialization"""
|
||||||
doc_bin = DocBin(attrs=["DEP", "HEAD"])
|
doc_bin = DocBin(attrs=["DEP", "HEAD"])
|
||||||
|
@ -107,6 +111,7 @@ def test_issue5141(en_vocab):
|
||||||
assert list(doc_bin_2.get_docs(en_vocab)) == []
|
assert list(doc_bin_2.get_docs(en_vocab)) == []
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(5152)
|
||||||
def test_issue5152():
|
def test_issue5152():
|
||||||
# Test that the comparison between a Span and a Token, goes well
|
# Test that the comparison between a Span and a Token, goes well
|
||||||
# There was a bug when the number of tokens in the span equaled the number of characters in the token (!)
|
# There was a bug when the number of tokens in the span equaled the number of characters in the token (!)
|
||||||
|
@ -125,6 +130,7 @@ def test_issue5152():
|
||||||
assert span_2.similarity(span_3) < 1.0
|
assert span_2.similarity(span_3) < 1.0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(5458)
|
||||||
def test_issue5458():
|
def test_issue5458():
|
||||||
# Test that the noun chuncker does not generate overlapping spans
|
# Test that the noun chuncker does not generate overlapping spans
|
||||||
# fmt: off
|
# fmt: off
|
||||||
|
|
|
@ -25,6 +25,7 @@ from spacy.training import Example
|
||||||
multi_label_cnn_config,
|
multi_label_cnn_config,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(5551)
|
||||||
def test_issue5551(textcat_config):
|
def test_issue5551(textcat_config):
|
||||||
"""Test that after fixing the random seed, the results of the pipeline are truly identical"""
|
"""Test that after fixing the random seed, the results of the pipeline are truly identical"""
|
||||||
component = "textcat"
|
component = "textcat"
|
||||||
|
@ -53,6 +54,7 @@ def test_issue5551(textcat_config):
|
||||||
assert_almost_equal(ops.to_numpy(results[0]), ops.to_numpy(results[2]), decimal=5)
|
assert_almost_equal(ops.to_numpy(results[0]), ops.to_numpy(results[2]), decimal=5)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(5838)
|
||||||
def test_issue5838():
|
def test_issue5838():
|
||||||
# Displacy's EntityRenderer break line
|
# Displacy's EntityRenderer break line
|
||||||
# not working after last entity
|
# not working after last entity
|
||||||
|
@ -65,6 +67,7 @@ def test_issue5838():
|
||||||
assert found == 4
|
assert found == 4
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(5918)
|
||||||
def test_issue5918():
|
def test_issue5918():
|
||||||
# Test edge case when merging entities.
|
# Test edge case when merging entities.
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
|
@ -4,6 +4,7 @@ from spacy.schemas import TokenPattern, TokenPatternSchema
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(6207)
|
||||||
def test_issue6207(en_tokenizer):
|
def test_issue6207(en_tokenizer):
|
||||||
doc = en_tokenizer("zero one two three four five six")
|
doc = en_tokenizer("zero one two three four five six")
|
||||||
|
|
||||||
|
@ -18,6 +19,7 @@ def test_issue6207(en_tokenizer):
|
||||||
assert s3 in result
|
assert s3 in result
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(6258)
|
||||||
def test_issue6258():
|
def test_issue6258():
|
||||||
"""Test that the non-empty constraint pattern field is respected"""
|
"""Test that the non-empty constraint pattern field is respected"""
|
||||||
# These one is valid
|
# These one is valid
|
||||||
|
|
|
@ -13,6 +13,7 @@ import pickle
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(6730)
|
||||||
def test_issue6730(en_vocab):
|
def test_issue6730(en_vocab):
|
||||||
"""Ensure that the KB does not accept empty strings, but otherwise IO works fine."""
|
"""Ensure that the KB does not accept empty strings, but otherwise IO works fine."""
|
||||||
from spacy.kb import KnowledgeBase
|
from spacy.kb import KnowledgeBase
|
||||||
|
@ -34,6 +35,7 @@ def test_issue6730(en_vocab):
|
||||||
assert set(kb.get_alias_strings()) == {"x", "y"}
|
assert set(kb.get_alias_strings()) == {"x", "y"}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(6755)
|
||||||
def test_issue6755(en_tokenizer):
|
def test_issue6755(en_tokenizer):
|
||||||
doc = en_tokenizer("This is a magnificent sentence.")
|
doc = en_tokenizer("This is a magnificent sentence.")
|
||||||
span = doc[:0]
|
span = doc[:0]
|
||||||
|
@ -45,6 +47,7 @@ def test_issue6755(en_tokenizer):
|
||||||
"sentence, start_idx,end_idx,label",
|
"sentence, start_idx,end_idx,label",
|
||||||
[("Welcome to Mumbai, my friend", 11, 17, "GPE")],
|
[("Welcome to Mumbai, my friend", 11, 17, "GPE")],
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(6815)
|
||||||
def test_issue6815_1(sentence, start_idx, end_idx, label):
|
def test_issue6815_1(sentence, start_idx, end_idx, label):
|
||||||
nlp = English()
|
nlp = English()
|
||||||
doc = nlp(sentence)
|
doc = nlp(sentence)
|
||||||
|
@ -55,6 +58,7 @@ def test_issue6815_1(sentence, start_idx, end_idx, label):
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"sentence, start_idx,end_idx,kb_id", [("Welcome to Mumbai, my friend", 11, 17, 5)]
|
"sentence, start_idx,end_idx,kb_id", [("Welcome to Mumbai, my friend", 11, 17, 5)]
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(6815)
|
||||||
def test_issue6815_2(sentence, start_idx, end_idx, kb_id):
|
def test_issue6815_2(sentence, start_idx, end_idx, kb_id):
|
||||||
nlp = English()
|
nlp = English()
|
||||||
doc = nlp(sentence)
|
doc = nlp(sentence)
|
||||||
|
@ -66,6 +70,7 @@ def test_issue6815_2(sentence, start_idx, end_idx, kb_id):
|
||||||
"sentence, start_idx,end_idx,vector",
|
"sentence, start_idx,end_idx,vector",
|
||||||
[("Welcome to Mumbai, my friend", 11, 17, np.array([0.1, 0.2, 0.3]))],
|
[("Welcome to Mumbai, my friend", 11, 17, np.array([0.1, 0.2, 0.3]))],
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(6815)
|
||||||
def test_issue6815_3(sentence, start_idx, end_idx, vector):
|
def test_issue6815_3(sentence, start_idx, end_idx, vector):
|
||||||
nlp = English()
|
nlp = English()
|
||||||
doc = nlp(sentence)
|
doc = nlp(sentence)
|
||||||
|
@ -73,6 +78,7 @@ def test_issue6815_3(sentence, start_idx, end_idx, vector):
|
||||||
assert (span.vector == vector).all()
|
assert (span.vector == vector).all()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(6839)
|
||||||
def test_issue6839(en_vocab):
|
def test_issue6839(en_vocab):
|
||||||
"""Ensure that PhraseMatcher accepts Span as input"""
|
"""Ensure that PhraseMatcher accepts Span as input"""
|
||||||
# fmt: off
|
# fmt: off
|
||||||
|
@ -155,6 +161,7 @@ labels = ['label1', 'label2']
|
||||||
"component_name",
|
"component_name",
|
||||||
["textcat", "textcat_multilabel"],
|
["textcat", "textcat_multilabel"],
|
||||||
)
|
)
|
||||||
|
@pytest.mark.issue(6908)
|
||||||
def test_issue6908(component_name):
|
def test_issue6908(component_name):
|
||||||
"""Test intializing textcat with labels in a list"""
|
"""Test intializing textcat with labels in a list"""
|
||||||
|
|
||||||
|
@ -219,6 +226,7 @@ upstream = "*"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(6950)
|
||||||
def test_issue6950():
|
def test_issue6950():
|
||||||
"""Test that the nlp object with initialized tok2vec with listeners pickles
|
"""Test that the nlp object with initialized tok2vec with listeners pickles
|
||||||
correctly (and doesn't have lambdas).
|
correctly (and doesn't have lambdas).
|
||||||
|
|
|
@ -13,6 +13,7 @@ from wasabi import msg
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(7019)
|
||||||
def test_issue7019():
|
def test_issue7019():
|
||||||
scores = {"LABEL_A": 0.39829102, "LABEL_B": 0.938298329382, "LABEL_C": None}
|
scores = {"LABEL_A": 0.39829102, "LABEL_B": 0.938298329382, "LABEL_C": None}
|
||||||
print_textcats_auc_per_cat(msg, scores)
|
print_textcats_auc_per_cat(msg, scores)
|
||||||
|
@ -64,6 +65,7 @@ upstream = "*"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(7029)
|
||||||
def test_issue7029():
|
def test_issue7029():
|
||||||
"""Test that an empty document doesn't mess up an entire batch."""
|
"""Test that an empty document doesn't mess up an entire batch."""
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
|
@ -84,6 +86,7 @@ def test_issue7029():
|
||||||
assert [doc[0].tag_ for doc in docs1[:-1]] == [doc[0].tag_ for doc in docs2[:-1]]
|
assert [doc[0].tag_ for doc in docs1[:-1]] == [doc[0].tag_ for doc in docs2[:-1]]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(7055)
|
||||||
def test_issue7055():
|
def test_issue7055():
|
||||||
"""Test that fill-config doesn't turn sourced components into factories."""
|
"""Test that fill-config doesn't turn sourced components into factories."""
|
||||||
source_cfg = {
|
source_cfg = {
|
||||||
|
@ -118,6 +121,7 @@ def test_issue7055():
|
||||||
assert "model" in filled_cfg["components"]["ner"]
|
assert "model" in filled_cfg["components"]["ner"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(7056)
|
||||||
def test_issue7056():
|
def test_issue7056():
|
||||||
"""Test that the Unshift transition works properly, and doesn't cause
|
"""Test that the Unshift transition works properly, and doesn't cause
|
||||||
sentence segmentation errors."""
|
sentence segmentation errors."""
|
||||||
|
@ -190,6 +194,7 @@ def test_partial_links():
|
||||||
assert "ORG" not in results["nel_f_per_type"]
|
assert "ORG" not in results["nel_f_per_type"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(7065)
|
||||||
def test_issue7065():
|
def test_issue7065():
|
||||||
text = "Kathleen Battle sang in Mahler 's Symphony No. 8 at the Cincinnati Symphony Orchestra 's May Festival."
|
text = "Kathleen Battle sang in Mahler 's Symphony No. 8 at the Cincinnati Symphony Orchestra 's May Festival."
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -217,6 +222,7 @@ def test_issue7065():
|
||||||
assert sentences.index(ent.sent) == 0
|
assert sentences.index(ent.sent) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(7065)
|
||||||
def test_issue7065_b():
|
def test_issue7065_b():
|
||||||
# Test that the NEL doesn't crash when an entity crosses a sentence boundary
|
# Test that the NEL doesn't crash when an entity crosses a sentence boundary
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
|
@ -43,6 +43,7 @@ def parser(vocab):
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(7716)
|
||||||
@pytest.mark.xfail(reason="Not fixed yet")
|
@pytest.mark.xfail(reason="Not fixed yet")
|
||||||
def test_partial_annotation(parser):
|
def test_partial_annotation(parser):
|
||||||
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
|
doc = Doc(parser.vocab, words=["a", "b", "c", "d"])
|
||||||
|
|
|
@ -3,6 +3,7 @@ from spacy.lang.en import English
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(8190)
|
||||||
def test_issue8190():
|
def test_issue8190():
|
||||||
"""Test that config overrides are not lost after load is complete."""
|
"""Test that config overrides are not lost after load is complete."""
|
||||||
source_cfg = {
|
source_cfg = {
|
||||||
|
|
|
@ -22,6 +22,7 @@ def patterns():
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.issue(8216)
|
||||||
def test_entity_ruler_fix8216(nlp, patterns):
|
def test_entity_ruler_fix8216(nlp, patterns):
|
||||||
"""Test that patterns don't get added excessively."""
|
"""Test that patterns don't get added excessively."""
|
||||||
ruler = nlp.add_pipe("entity_ruler", config={"validate": True})
|
ruler = nlp.add_pipe("entity_ruler", config={"validate": True})
|
||||||
|
|
Loading…
Reference in New Issue
Block a user