mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Auto-format code with black
This commit is contained in:
parent
c5c4e96597
commit
ee37288a1f
|
@ -255,7 +255,7 @@ def test_token_api_non_conjuncts(en_vocab):
|
||||||
|
|
||||||
|
|
||||||
def test_missing_head_dep(en_vocab):
|
def test_missing_head_dep(en_vocab):
|
||||||
""" Check that the Doc constructor and Example.from_dict parse missing information the same"""
|
"""Check that the Doc constructor and Example.from_dict parse missing information the same"""
|
||||||
heads = [1, 1, 1, 1, 2, None] # element 5 is missing
|
heads = [1, 1, 1, 1, 2, None] # element 5 is missing
|
||||||
deps = ["", "ROOT", "dobj", "cc", "conj", None] # element 0 and 5 are missing
|
deps = ["", "ROOT", "dobj", "cc", "conj", None] # element 0 and 5 are missing
|
||||||
words = ["I", "like", "London", "and", "Berlin", "."]
|
words = ["I", "like", "London", "and", "Berlin", "."]
|
||||||
|
|
|
@ -5,7 +5,7 @@ import pytest
|
||||||
"text,expected_tokens", [("d'un", ["d'", "un"]), ("s'ha", ["s'", "ha"])]
|
"text,expected_tokens", [("d'un", ["d'", "un"]), ("s'ha", ["s'", "ha"])]
|
||||||
)
|
)
|
||||||
def test_contractions(ca_tokenizer, text, expected_tokens):
|
def test_contractions(ca_tokenizer, text, expected_tokens):
|
||||||
""" Test that the contractions are split into two tokens"""
|
"""Test that the contractions are split into two tokens"""
|
||||||
tokens = ca_tokenizer(text)
|
tokens = ca_tokenizer(text)
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
assert [t.text for t in tokens] == expected_tokens
|
assert [t.text for t in tokens] == expected_tokens
|
||||||
|
|
|
@ -5,7 +5,7 @@ import pytest
|
||||||
"text,expected_tokens", [("c'è", ["c'", "è"]), ("l'ha", ["l'", "ha"])]
|
"text,expected_tokens", [("c'è", ["c'", "è"]), ("l'ha", ["l'", "ha"])]
|
||||||
)
|
)
|
||||||
def test_contractions(it_tokenizer, text, expected_tokens):
|
def test_contractions(it_tokenizer, text, expected_tokens):
|
||||||
""" Test that the contractions are split into two tokens"""
|
"""Test that the contractions are split into two tokens"""
|
||||||
tokens = it_tokenizer(text)
|
tokens = it_tokenizer(text)
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
assert [t.text for t in tokens] == expected_tokens
|
assert [t.text for t in tokens] == expected_tokens
|
||||||
|
|
|
@ -304,7 +304,7 @@ def test_empty_ner():
|
||||||
|
|
||||||
|
|
||||||
def test_ruler_before_ner():
|
def test_ruler_before_ner():
|
||||||
""" Test that an NER works after an entity_ruler: the second can add annotations """
|
"""Test that an NER works after an entity_ruler: the second can add annotations"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
||||||
# 1 : Entity Ruler - should set "this" to B and everything else to empty
|
# 1 : Entity Ruler - should set "this" to B and everything else to empty
|
||||||
|
@ -334,7 +334,7 @@ def test_ner_constructor(en_vocab):
|
||||||
|
|
||||||
|
|
||||||
def test_ner_before_ruler():
|
def test_ner_before_ruler():
|
||||||
""" Test that an entity_ruler works after an NER: the second can overwrite O annotations """
|
"""Test that an entity_ruler works after an NER: the second can overwrite O annotations"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
||||||
# 1: untrained NER - should set everything to O
|
# 1: untrained NER - should set everything to O
|
||||||
|
@ -355,7 +355,7 @@ def test_ner_before_ruler():
|
||||||
|
|
||||||
|
|
||||||
def test_block_ner():
|
def test_block_ner():
|
||||||
""" Test functionality for blocking tokens so they can't be in a named entity """
|
"""Test functionality for blocking tokens so they can't be in a named entity"""
|
||||||
# block "Antti L Korhonen" from being a named entity
|
# block "Antti L Korhonen" from being a named entity
|
||||||
nlp = English()
|
nlp = English()
|
||||||
nlp.add_pipe("blocker", config={"start": 2, "end": 5})
|
nlp.add_pipe("blocker", config={"start": 2, "end": 5})
|
||||||
|
|
|
@ -197,7 +197,7 @@ def test_issue3555(en_vocab):
|
||||||
|
|
||||||
|
|
||||||
def test_issue3611():
|
def test_issue3611():
|
||||||
""" Test whether adding n-grams in the textcat works even when n > token length of some docs """
|
"""Test whether adding n-grams in the textcat works even when n > token length of some docs"""
|
||||||
unique_classes = ["offensive", "inoffensive"]
|
unique_classes = ["offensive", "inoffensive"]
|
||||||
x_train = [
|
x_train = [
|
||||||
"This is an offensive text",
|
"This is an offensive text",
|
||||||
|
@ -282,7 +282,7 @@ def test_issue3830_with_subtok():
|
||||||
|
|
||||||
|
|
||||||
def test_issue3839(en_vocab):
|
def test_issue3839(en_vocab):
|
||||||
"""Test that match IDs returned by the matcher are correct, are in the string """
|
"""Test that match IDs returned by the matcher are correct, are in the string"""
|
||||||
doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])
|
doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
match_id = "PATTERN"
|
match_id = "PATTERN"
|
||||||
|
@ -366,7 +366,7 @@ def test_issue3951(en_vocab):
|
||||||
|
|
||||||
|
|
||||||
def test_issue3959():
|
def test_issue3959():
|
||||||
""" Ensure that a modified pos attribute is serialized correctly."""
|
"""Ensure that a modified pos attribute is serialized correctly."""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
doc = nlp(
|
doc = nlp(
|
||||||
"displaCy uses JavaScript, SVG and CSS to show you how computers understand language"
|
"displaCy uses JavaScript, SVG and CSS to show you how computers understand language"
|
||||||
|
|
|
@ -38,7 +38,7 @@ def test_issue4002(en_vocab):
|
||||||
|
|
||||||
|
|
||||||
def test_issue4030():
|
def test_issue4030():
|
||||||
""" Test whether textcat works fine with empty doc """
|
"""Test whether textcat works fine with empty doc"""
|
||||||
unique_classes = ["offensive", "inoffensive"]
|
unique_classes = ["offensive", "inoffensive"]
|
||||||
x_train = [
|
x_train = [
|
||||||
"This is an offensive text",
|
"This is an offensive text",
|
||||||
|
@ -237,7 +237,7 @@ def test_issue4190():
|
||||||
|
|
||||||
|
|
||||||
def test_issue4267():
|
def test_issue4267():
|
||||||
""" Test that running an entity_ruler after ner gives consistent results"""
|
"""Test that running an entity_ruler after ner gives consistent results"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
ner = nlp.add_pipe("ner")
|
ner = nlp.add_pipe("ner")
|
||||||
ner.add_label("PEOPLE")
|
ner.add_label("PEOPLE")
|
||||||
|
@ -288,7 +288,7 @@ def test_multiple_predictions():
|
||||||
|
|
||||||
|
|
||||||
def test_issue4313():
|
def test_issue4313():
|
||||||
""" This should not crash or exit with some strange error code """
|
"""This should not crash or exit with some strange error code"""
|
||||||
beam_width = 16
|
beam_width = 16
|
||||||
beam_density = 0.0001
|
beam_density = 0.0001
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
|
@ -152,7 +152,7 @@ def test_issue4707():
|
||||||
|
|
||||||
|
|
||||||
def test_issue4725_1():
|
def test_issue4725_1():
|
||||||
""" Ensure the pickling of the NER goes well"""
|
"""Ensure the pickling of the NER goes well"""
|
||||||
vocab = Vocab(vectors_name="test_vocab_add_vector")
|
vocab = Vocab(vectors_name="test_vocab_add_vector")
|
||||||
nlp = English(vocab=vocab)
|
nlp = English(vocab=vocab)
|
||||||
config = {
|
config = {
|
||||||
|
|
|
@ -96,7 +96,7 @@ def test_issue5137():
|
||||||
|
|
||||||
|
|
||||||
def test_issue5141(en_vocab):
|
def test_issue5141(en_vocab):
|
||||||
""" Ensure an empty DocBin does not crash on serialization """
|
"""Ensure an empty DocBin does not crash on serialization"""
|
||||||
doc_bin = DocBin(attrs=["DEP", "HEAD"])
|
doc_bin = DocBin(attrs=["DEP", "HEAD"])
|
||||||
assert list(doc_bin.get_docs(en_vocab)) == []
|
assert list(doc_bin.get_docs(en_vocab)) == []
|
||||||
doc_bin_bytes = doc_bin.to_bytes()
|
doc_bin_bytes = doc_bin.to_bytes()
|
||||||
|
|
|
@ -238,7 +238,7 @@ def test_create_nlp_from_config_multiple_instances():
|
||||||
|
|
||||||
|
|
||||||
def test_serialize_nlp():
|
def test_serialize_nlp():
|
||||||
""" Create a custom nlp pipeline from config and ensure it serializes it correctly """
|
"""Create a custom nlp pipeline from config and ensure it serializes it correctly"""
|
||||||
nlp_config = Config().from_str(nlp_config_string)
|
nlp_config = Config().from_str(nlp_config_string)
|
||||||
nlp = load_model_from_config(nlp_config, auto_fill=True)
|
nlp = load_model_from_config(nlp_config, auto_fill=True)
|
||||||
nlp.get_pipe("tagger").add_label("A")
|
nlp.get_pipe("tagger").add_label("A")
|
||||||
|
@ -258,7 +258,7 @@ def test_serialize_nlp():
|
||||||
|
|
||||||
|
|
||||||
def test_serialize_custom_nlp():
|
def test_serialize_custom_nlp():
|
||||||
""" Create a custom nlp pipeline and ensure it serializes it correctly"""
|
"""Create a custom nlp pipeline and ensure it serializes it correctly"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
parser_cfg = dict()
|
parser_cfg = dict()
|
||||||
parser_cfg["model"] = {"@architectures": "my_test_parser"}
|
parser_cfg["model"] = {"@architectures": "my_test_parser"}
|
||||||
|
@ -279,7 +279,7 @@ def test_serialize_custom_nlp():
|
||||||
"parser_config_string", [parser_config_string_upper, parser_config_string_no_upper]
|
"parser_config_string", [parser_config_string_upper, parser_config_string_no_upper]
|
||||||
)
|
)
|
||||||
def test_serialize_parser(parser_config_string):
|
def test_serialize_parser(parser_config_string):
|
||||||
""" Create a non-default parser config to check nlp serializes it correctly """
|
"""Create a non-default parser config to check nlp serializes it correctly"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
model_config = Config().from_str(parser_config_string)
|
model_config = Config().from_str(parser_config_string)
|
||||||
parser = nlp.add_pipe("parser", config=model_config)
|
parser = nlp.add_pipe("parser", config=model_config)
|
||||||
|
|
|
@ -275,7 +275,7 @@ def test_util_minibatch(doc_sizes, expected_batches):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_util_minibatch_oversize(doc_sizes, expected_batches):
|
def test_util_minibatch_oversize(doc_sizes, expected_batches):
|
||||||
""" Test that oversized documents are returned in their own batch"""
|
"""Test that oversized documents are returned in their own batch"""
|
||||||
docs = [get_random_doc(doc_size) for doc_size in doc_sizes]
|
docs = [get_random_doc(doc_size) for doc_size in doc_sizes]
|
||||||
tol = 0.2
|
tol = 0.2
|
||||||
batch_size = 1000
|
batch_size = 1000
|
||||||
|
|
|
@ -69,7 +69,7 @@ def read_conllx(
|
||||||
ner_tag_pattern="",
|
ner_tag_pattern="",
|
||||||
ner_map=None,
|
ner_map=None,
|
||||||
):
|
):
|
||||||
""" Yield docs, one for each sentence """
|
"""Yield docs, one for each sentence"""
|
||||||
vocab = Vocab() # need vocab to make a minimal Doc
|
vocab = Vocab() # need vocab to make a minimal Doc
|
||||||
for sent in input_data.strip().split("\n\n"):
|
for sent in input_data.strip().split("\n\n"):
|
||||||
lines = sent.strip().split("\n")
|
lines = sent.strip().split("\n")
|
||||||
|
|
|
@ -186,7 +186,7 @@ class Corpus:
|
||||||
def read_docbin(
|
def read_docbin(
|
||||||
self, vocab: Vocab, locs: Iterable[Union[str, Path]]
|
self, vocab: Vocab, locs: Iterable[Union[str, Path]]
|
||||||
) -> Iterator[Doc]:
|
) -> Iterator[Doc]:
|
||||||
""" Yield training examples as example dicts """
|
"""Yield training examples as example dicts"""
|
||||||
i = 0
|
i = 0
|
||||||
for loc in locs:
|
for loc in locs:
|
||||||
loc = util.ensure_path(loc)
|
loc = util.ensure_path(loc)
|
||||||
|
|
|
@ -110,6 +110,7 @@ def wandb_logger(
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
import wandb
|
import wandb
|
||||||
|
|
||||||
# test that these are available
|
# test that these are available
|
||||||
from wandb import init, log, join # noqa: F401
|
from wandb import init, log, join # noqa: F401
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user