From fa47f87924c1c9cfcc30ade50933488bcd62c423 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 29 Sep 2020 21:39:28 +0200 Subject: [PATCH] Tidy up and auto-format --- spacy/cli/convert.py | 3 ++- spacy/cli/debug_data.py | 2 +- spacy/cli/project/dvc.py | 2 +- spacy/lang/en/lemmatizer.py | 3 +-- spacy/lang/es/syntax_iterators.py | 2 +- spacy/lang/sa/lex_attrs.py | 4 ++-- spacy/lang/vi/__init__.py | 2 +- spacy/language.py | 2 +- spacy/pipe_analysis.py | 2 +- spacy/pipeline/attributeruler.py | 7 +++--- spacy/pipeline/lemmatizer.py | 2 +- spacy/pipeline/textcat.py | 2 +- spacy/schemas.py | 2 +- spacy/scorer.py | 7 ++---- spacy/tests/doc/test_doc_api.py | 2 +- spacy/tests/lang/de/test_noun_chunks.py | 3 +-- spacy/tests/lang/el/test_noun_chunks.py | 3 +-- spacy/tests/lang/en/test_noun_chunks.py | 3 +-- spacy/tests/lang/es/test_noun_chunks.py | 3 +-- spacy/tests/lang/fa/test_noun_chunks.py | 3 +-- spacy/tests/lang/fr/test_exceptions.py | 4 +--- spacy/tests/lang/fr/test_noun_chunks.py | 3 +-- spacy/tests/lang/id/test_noun_chunks.py | 3 +-- spacy/tests/lang/ja/test_tokenizer.py | 2 +- spacy/tests/lang/nb/test_noun_chunks.py | 3 +-- spacy/tests/lang/ne/test_text.py | 2 +- spacy/tests/lang/sa/test_text.py | 2 +- spacy/tests/lang/sv/test_noun_chunks.py | 3 +-- spacy/tests/pipeline/test_entity_linker.py | 8 +++---- spacy/tests/pipeline/test_textcat.py | 23 +++++++++++-------- spacy/tests/pipeline/test_tok2vec.py | 3 +-- spacy/tests/regression/test_issue3501-4000.py | 11 ++++----- spacy/tests/regression/test_issue4001-4500.py | 7 ++---- spacy/tests/test_cli.py | 2 +- spacy/tests/test_language.py | 4 +--- spacy/tests/test_models.py | 2 +- spacy/tests/test_scorer.py | 2 +- spacy/tests/training/test_training.py | 6 +++-- spacy/training/augment.py | 6 ++--- spacy/training/initialize.py | 4 ++-- spacy/training/pretrain.py | 2 +- spacy/util.py | 2 +- 42 files changed, 71 insertions(+), 92 deletions(-) diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py index 3fc530822..e4559929e 100644 --- a/spacy/cli/convert.py +++ b/spacy/cli/convert.py @@ -9,7 +9,8 @@ import sys from ._util import app, Arg, Opt from ..training import docs_to_json from ..tokens import DocBin -from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs, conllu_to_docs +from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs +from ..training.converters import conllu_to_docs # Converters are matched by file extension except for ner/iob, which are diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index c4d1069c0..b4c420660 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -27,7 +27,7 @@ BLANK_MODEL_THRESHOLD = 2000 @debug_cli.command( - "data", context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, + "data", context_settings={"allow_extra_args": True, "ignore_unknown_options": True} ) @app.command( "debug-data", diff --git a/spacy/cli/project/dvc.py b/spacy/cli/project/dvc.py index 541253234..6eedc9c20 100644 --- a/spacy/cli/project/dvc.py +++ b/spacy/cli/project/dvc.py @@ -134,7 +134,7 @@ def update_dvc_config( def run_dvc_commands( - commands: Iterable[str] = SimpleFrozenList(), flags: Dict[str, bool] = {}, + commands: Iterable[str] = SimpleFrozenList(), flags: Dict[str, bool] = {} ) -> None: """Run a sequence of DVC commands in a subprocess, in order. diff --git a/spacy/lang/en/lemmatizer.py b/spacy/lang/en/lemmatizer.py index be389f117..2cb0f9a53 100644 --- a/spacy/lang/en/lemmatizer.py +++ b/spacy/lang/en/lemmatizer.py @@ -3,8 +3,7 @@ from ...tokens import Token class EnglishLemmatizer(Lemmatizer): - """English lemmatizer. Only overrides is_base_form. - """ + """English lemmatizer. Only overrides is_base_form.""" def is_base_form(self, token: Token) -> bool: """ diff --git a/spacy/lang/es/syntax_iterators.py b/spacy/lang/es/syntax_iterators.py index ad0a1b838..4dd4f99be 100644 --- a/spacy/lang/es/syntax_iterators.py +++ b/spacy/lang/es/syntax_iterators.py @@ -58,7 +58,7 @@ def noun_bounds( doc, token, np_left_deps, np_right_deps, stop_deps ) filter_func = lambda t: is_verb_token(t) or t.dep in stop_deps - if list(filter(filter_func, doc[left_bound.i : right.i],)): + if list(filter(filter_func, doc[left_bound.i : right.i])): break else: right_bound = right diff --git a/spacy/lang/sa/lex_attrs.py b/spacy/lang/sa/lex_attrs.py index f2b51650b..bdceb7ec2 100644 --- a/spacy/lang/sa/lex_attrs.py +++ b/spacy/lang/sa/lex_attrs.py @@ -108,8 +108,8 @@ _num_words = [ def like_num(text): """ - Check if text resembles a number - """ + Check if text resembles a number + """ if text.startswith(("+", "-", "±", "~")): text = text[1:] text = text.replace(",", "").replace(".", "") diff --git a/spacy/lang/vi/__init__.py b/spacy/lang/vi/__init__.py index 1db762adb..71f51eac6 100644 --- a/spacy/lang/vi/__init__.py +++ b/spacy/lang/vi/__init__.py @@ -17,7 +17,7 @@ use_pyvi = true @registry.tokenizers("spacy.vi.VietnameseTokenizer") -def create_vietnamese_tokenizer(use_pyvi: bool = True,): +def create_vietnamese_tokenizer(use_pyvi: bool = True): def vietnamese_tokenizer_factory(nlp): return VietnameseTokenizer(nlp, use_pyvi=use_pyvi) diff --git a/spacy/language.py b/spacy/language.py index 9591cb61d..14b9f4eb0 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1189,7 +1189,7 @@ class Language: # These are the settings provided in the [initialize] block in the config I = registry.resolve(config["initialize"], schema=ConfigSchemaInit) init_vocab( - self, data=I["vocab_data"], lookups=I["lookups"], vectors=I["vectors"], + self, data=I["vocab_data"], lookups=I["lookups"], vectors=I["vectors"] ) pretrain_cfg = config.get("pretraining") if pretrain_cfg: diff --git a/spacy/pipe_analysis.py b/spacy/pipe_analysis.py index 008ac3384..d0362e7e1 100644 --- a/spacy/pipe_analysis.py +++ b/spacy/pipe_analysis.py @@ -78,7 +78,7 @@ def get_attr_info(nlp: "Language", attr: str) -> Dict[str, List[str]]: def analyze_pipes( - nlp: "Language", *, keys: List[str] = DEFAULT_KEYS, + nlp: "Language", *, keys: List[str] = DEFAULT_KEYS ) -> Dict[str, Union[List[str], Dict[str, List[str]]]]: """Print a formatted summary for the current nlp object's pipeline. Shows a table with the pipeline components and why they assign and require, as diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py index 4243ebcfb..f314953e9 100644 --- a/spacy/pipeline/attributeruler.py +++ b/spacy/pipeline/attributeruler.py @@ -82,8 +82,7 @@ class AttributeRuler(Pipe): matches = self.matcher(doc, allow_missing=True) # Sort by the attribute ID, so that later rules have precendence matches = [ - (int(self.vocab.strings[m_id]), m_id, s, e) - for m_id, s, e in matches + (int(self.vocab.strings[m_id]), m_id, s, e) for m_id, s, e in matches ] matches.sort() for attr_id, match_id, start, end in matches: @@ -93,7 +92,7 @@ class AttributeRuler(Pipe): try: # The index can be negative, which makes it annoying to do # the boundscheck. Let Span do it instead. - token = span[index] + token = span[index] # noqa: F841 except IndexError: # The original exception is just our conditional logic, so we # raise from. @@ -103,7 +102,7 @@ class AttributeRuler(Pipe): span=[t.text for t in span], index=index, ) - ) from None + ) from None set_token_attrs(span[index], attrs) return doc diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py index c30d09f62..391769604 100644 --- a/spacy/pipeline/lemmatizer.py +++ b/spacy/pipeline/lemmatizer.py @@ -67,7 +67,7 @@ class Lemmatizer(Pipe): return {} @classmethod - def load_lookups(cls, lang: str, mode: str, lookups: Optional[Lookups],) -> Lookups: + def load_lookups(cls, lang: str, mode: str, lookups: Optional[Lookups]) -> Lookups: """Load and validate lookups tables. If the provided lookups is None, load the default lookups tables according to the language and mode settings. Confirm that all required tables for the language and mode diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index 776b0a178..c5b8b615b 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -347,7 +347,7 @@ class TextCategorizer(Pipe): get_examples: Callable[[], Iterable[Example]], *, nlp: Optional[Language] = None, - labels: Optional[Dict] = None + labels: Optional[Dict] = None, ): """Initialize the pipe for training, using a representative set of data examples. diff --git a/spacy/schemas.py b/spacy/schemas.py index d9a31c742..1125fa7da 100644 --- a/spacy/schemas.py +++ b/spacy/schemas.py @@ -132,7 +132,7 @@ def validate_init_settings( block = "initialize" if not section else f"initialize.{section}" title = f"Error validating initialization settings in [{block}]" raise ConfigValidationError( - title=title, errors=e.errors(), config=settings, parent=name, + title=title, errors=e.errors(), config=settings, parent=name ) from None diff --git a/spacy/scorer.py b/spacy/scorer.py index b2f97e163..db32dabae 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -32,9 +32,7 @@ class PRFScore: def __add__(self, other): return PRFScore( - tp=self.tp+other.tp, - fp=self.fp+other.fp, - fn=self.fn+other.fn + tp=self.tp + other.tp, fp=self.fp + other.fp, fn=self.fn + other.fn ) def score_set(self, cand: set, gold: set) -> None: @@ -485,7 +483,7 @@ class Scorer: (pred_ent.start_char, pred_ent.end_char), None ) label = gold_span.label_ - if not label in f_per_type: + if label not in f_per_type: f_per_type[label] = PRFScore() gold = gold_span.kb_id_ # only evaluating entities that overlap between gold and pred, @@ -632,7 +630,6 @@ def get_ner_prf(examples: Iterable[Example]) -> Dict[str, PRFScore]: continue golds = {(e.label_, e.start, e.end) for e in eg.y.ents} align_x2y = eg.alignment.x2y - preds = set() for pred_ent in eg.x.ents: if pred_ent.label_ not in scores: scores[pred_ent.label_] = PRFScore() diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index e5e72fe2a..b4b853701 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -19,7 +19,7 @@ def test_doc_api_init(en_vocab): assert [t.is_sent_start for t in doc] == [True, False, True, False] # heads override sent_starts doc = Doc( - en_vocab, words=words, sent_starts=[True] * 4, heads=heads, deps=["dep"] * 4, + en_vocab, words=words, sent_starts=[True] * 4, heads=heads, deps=["dep"] * 4 ) assert [t.is_sent_start for t in doc] == [True, False, True, False] diff --git a/spacy/tests/lang/de/test_noun_chunks.py b/spacy/tests/lang/de/test_noun_chunks.py index 0ed12d208..7b8b15b1c 100644 --- a/spacy/tests/lang/de/test_noun_chunks.py +++ b/spacy/tests/lang/de/test_noun_chunks.py @@ -2,8 +2,7 @@ import pytest def test_noun_chunks_is_parsed_de(de_tokenizer): - """Test that noun_chunks raises Value Error for 'de' language if Doc is not parsed. - """ + """Test that noun_chunks raises Value Error for 'de' language if Doc is not parsed.""" doc = de_tokenizer("Er lag auf seinem") with pytest.raises(ValueError): list(doc.noun_chunks) diff --git a/spacy/tests/lang/el/test_noun_chunks.py b/spacy/tests/lang/el/test_noun_chunks.py index 2d376c612..2684a5cfb 100644 --- a/spacy/tests/lang/el/test_noun_chunks.py +++ b/spacy/tests/lang/el/test_noun_chunks.py @@ -2,8 +2,7 @@ import pytest def test_noun_chunks_is_parsed_el(el_tokenizer): - """Test that noun_chunks raises Value Error for 'el' language if Doc is not parsed. - """ + """Test that noun_chunks raises Value Error for 'el' language if Doc is not parsed.""" doc = el_tokenizer("είναι χώρα της νοτιοανατολικής") with pytest.raises(ValueError): list(doc.noun_chunks) diff --git a/spacy/tests/lang/en/test_noun_chunks.py b/spacy/tests/lang/en/test_noun_chunks.py index 0189a26d4..540f3ed84 100644 --- a/spacy/tests/lang/en/test_noun_chunks.py +++ b/spacy/tests/lang/en/test_noun_chunks.py @@ -7,8 +7,7 @@ import pytest def test_noun_chunks_is_parsed(en_tokenizer): - """Test that noun_chunks raises Value Error for 'en' language if Doc is not parsed. - """ + """Test that noun_chunks raises Value Error for 'en' language if Doc is not parsed.""" doc = en_tokenizer("This is a sentence") with pytest.raises(ValueError): list(doc.noun_chunks) diff --git a/spacy/tests/lang/es/test_noun_chunks.py b/spacy/tests/lang/es/test_noun_chunks.py index db89fd903..e5afd81c9 100644 --- a/spacy/tests/lang/es/test_noun_chunks.py +++ b/spacy/tests/lang/es/test_noun_chunks.py @@ -2,8 +2,7 @@ import pytest def test_noun_chunks_is_parsed_es(es_tokenizer): - """Test that noun_chunks raises Value Error for 'es' language if Doc is not parsed. - """ + """Test that noun_chunks raises Value Error for 'es' language if Doc is not parsed.""" doc = es_tokenizer("en Oxford este verano") with pytest.raises(ValueError): list(doc.noun_chunks) diff --git a/spacy/tests/lang/fa/test_noun_chunks.py b/spacy/tests/lang/fa/test_noun_chunks.py index 53b39d9a1..d2411e6d3 100644 --- a/spacy/tests/lang/fa/test_noun_chunks.py +++ b/spacy/tests/lang/fa/test_noun_chunks.py @@ -2,8 +2,7 @@ import pytest def test_noun_chunks_is_parsed_fa(fa_tokenizer): - """Test that noun_chunks raises Value Error for 'fa' language if Doc is not parsed. - """ + """Test that noun_chunks raises Value Error for 'fa' language if Doc is not parsed.""" doc = fa_tokenizer("این یک جمله نمونه می باشد.") with pytest.raises(ValueError): diff --git a/spacy/tests/lang/fr/test_exceptions.py b/spacy/tests/lang/fr/test_exceptions.py index 77e72a76b..d75c653d0 100644 --- a/spacy/tests/lang/fr/test_exceptions.py +++ b/spacy/tests/lang/fr/test_exceptions.py @@ -36,9 +36,7 @@ def test_fr_tokenizer_infix_exceptions(fr_tokenizer, text): assert len(tokens) == 1 -@pytest.mark.parametrize( - "text", ["janv.", "juill.", "Dr.", "av.", "sept."], -) +@pytest.mark.parametrize("text", ["janv.", "juill.", "Dr.", "av.", "sept."]) def test_fr_tokenizer_handles_abbr(fr_tokenizer, text): tokens = fr_tokenizer(text) assert len(tokens) == 1 diff --git a/spacy/tests/lang/fr/test_noun_chunks.py b/spacy/tests/lang/fr/test_noun_chunks.py index d81199a3e..48ac88ead 100644 --- a/spacy/tests/lang/fr/test_noun_chunks.py +++ b/spacy/tests/lang/fr/test_noun_chunks.py @@ -2,8 +2,7 @@ import pytest def test_noun_chunks_is_parsed_fr(fr_tokenizer): - """Test that noun_chunks raises Value Error for 'fr' language if Doc is not parsed. - """ + """Test that noun_chunks raises Value Error for 'fr' language if Doc is not parsed.""" doc = fr_tokenizer("trouver des travaux antérieurs") with pytest.raises(ValueError): list(doc.noun_chunks) diff --git a/spacy/tests/lang/id/test_noun_chunks.py b/spacy/tests/lang/id/test_noun_chunks.py index fef1524f1..a39456581 100644 --- a/spacy/tests/lang/id/test_noun_chunks.py +++ b/spacy/tests/lang/id/test_noun_chunks.py @@ -2,8 +2,7 @@ import pytest def test_noun_chunks_is_parsed_id(id_tokenizer): - """Test that noun_chunks raises Value Error for 'id' language if Doc is not parsed. - """ + """Test that noun_chunks raises Value Error for 'id' language if Doc is not parsed.""" doc = id_tokenizer("sebelas") with pytest.raises(ValueError): list(doc.noun_chunks) diff --git a/spacy/tests/lang/ja/test_tokenizer.py b/spacy/tests/lang/ja/test_tokenizer.py index e52741b70..c8c85d655 100644 --- a/spacy/tests/lang/ja/test_tokenizer.py +++ b/spacy/tests/lang/ja/test_tokenizer.py @@ -112,7 +112,7 @@ def test_ja_tokenizer_split_modes(ja_tokenizer, text, len_a, len_b, len_c): @pytest.mark.parametrize( - "text,sub_tokens_list_a,sub_tokens_list_b,sub_tokens_list_c", SUB_TOKEN_TESTS, + "text,sub_tokens_list_a,sub_tokens_list_b,sub_tokens_list_c", SUB_TOKEN_TESTS ) def test_ja_tokenizer_sub_tokens( ja_tokenizer, text, sub_tokens_list_a, sub_tokens_list_b, sub_tokens_list_c diff --git a/spacy/tests/lang/nb/test_noun_chunks.py b/spacy/tests/lang/nb/test_noun_chunks.py index 9965fcd14..dd259f2b7 100644 --- a/spacy/tests/lang/nb/test_noun_chunks.py +++ b/spacy/tests/lang/nb/test_noun_chunks.py @@ -2,8 +2,7 @@ import pytest def test_noun_chunks_is_parsed_nb(nb_tokenizer): - """Test that noun_chunks raises Value Error for 'nb' language if Doc is not parsed. - """ + """Test that noun_chunks raises Value Error for 'nb' language if Doc is not parsed.""" doc = nb_tokenizer("Smørsausen brukes bl.a. til") with pytest.raises(ValueError): list(doc.noun_chunks) diff --git a/spacy/tests/lang/ne/test_text.py b/spacy/tests/lang/ne/test_text.py index 7dd971132..e8a6c2e98 100644 --- a/spacy/tests/lang/ne/test_text.py +++ b/spacy/tests/lang/ne/test_text.py @@ -8,7 +8,7 @@ def test_ne_tokenizer_handlers_long_text(ne_tokenizer): @pytest.mark.parametrize( - "text,length", [("समय जान कति पनि बेर लाग्दैन ।", 7), ("म ठूलो हुँदै थिएँ ।", 5)], + "text,length", [("समय जान कति पनि बेर लाग्दैन ।", 7), ("म ठूलो हुँदै थिएँ ।", 5)] ) def test_ne_tokenizer_handles_cnts(ne_tokenizer, text, length): tokens = ne_tokenizer(text) diff --git a/spacy/tests/lang/sa/test_text.py b/spacy/tests/lang/sa/test_text.py index 41257a4d8..daa8d20c0 100644 --- a/spacy/tests/lang/sa/test_text.py +++ b/spacy/tests/lang/sa/test_text.py @@ -10,7 +10,7 @@ def test_sa_tokenizer_handles_long_text(sa_tokenizer): @pytest.mark.parametrize( "text,length", [ - ("श्री भगवानुवाच पश्य मे पार्थ रूपाणि शतशोऽथ सहस्रशः।", 9,), + ("श्री भगवानुवाच पश्य मे पार्थ रूपाणि शतशोऽथ सहस्रशः।", 9), ("गुणान् सर्वान् स्वभावो मूर्ध्नि वर्तते ।", 6), ], ) diff --git a/spacy/tests/lang/sv/test_noun_chunks.py b/spacy/tests/lang/sv/test_noun_chunks.py index 3791d8021..d2410156c 100644 --- a/spacy/tests/lang/sv/test_noun_chunks.py +++ b/spacy/tests/lang/sv/test_noun_chunks.py @@ -3,8 +3,7 @@ from spacy.tokens import Doc def test_noun_chunks_is_parsed_sv(sv_tokenizer): - """Test that noun_chunks raises Value Error for 'sv' language if Doc is not parsed. - """ + """Test that noun_chunks raises Value Error for 'sv' language if Doc is not parsed.""" doc = sv_tokenizer("Studenten läste den bästa boken") with pytest.raises(ValueError): list(doc.noun_chunks) diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index d5c8de36b..66de54c06 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -254,14 +254,12 @@ def test_vocab_serialization(nlp): mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1) # adding entities - q1_hash = mykb.add_entity(entity="Q1", freq=27, entity_vector=[1]) + mykb.add_entity(entity="Q1", freq=27, entity_vector=[1]) q2_hash = mykb.add_entity(entity="Q2", freq=12, entity_vector=[2]) - q3_hash = mykb.add_entity(entity="Q3", freq=5, entity_vector=[3]) + mykb.add_entity(entity="Q3", freq=5, entity_vector=[3]) # adding aliases - douglas_hash = mykb.add_alias( - alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1] - ) + mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.4, 0.1]) adam_hash = mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9]) candidates = mykb.get_alias_candidates("adam") diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index ff36bbda9..e0a785851 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -226,6 +226,7 @@ def test_positive_class_not_binary(): with pytest.raises(ValueError): verify_textcat_config(nlp, pipe_config) + def test_textcat_evaluation(): train_examples = [] nlp = English() @@ -241,15 +242,17 @@ def test_textcat_evaluation(): pred2.cats = {"winter": 1.0, "summer": 0.0, "spring": 0.0, "autumn": 1.0} train_examples.append(Example(pred2, ref2)) - scores = Scorer().score_cats(train_examples, "cats", labels=["winter", "summer", "spring", "autumn"]) - assert scores["cats_f_per_type"]["winter"]["p"] == 1/2 - assert scores["cats_f_per_type"]["winter"]["r"] == 1/1 + scores = Scorer().score_cats( + train_examples, "cats", labels=["winter", "summer", "spring", "autumn"] + ) + assert scores["cats_f_per_type"]["winter"]["p"] == 1 / 2 + assert scores["cats_f_per_type"]["winter"]["r"] == 1 / 1 assert scores["cats_f_per_type"]["summer"]["p"] == 0 - assert scores["cats_f_per_type"]["summer"]["r"] == 0/1 - assert scores["cats_f_per_type"]["spring"]["p"] == 1/1 - assert scores["cats_f_per_type"]["spring"]["r"] == 1/2 - assert scores["cats_f_per_type"]["autumn"]["p"] == 2/2 - assert scores["cats_f_per_type"]["autumn"]["r"] == 2/2 + assert scores["cats_f_per_type"]["summer"]["r"] == 0 / 1 + assert scores["cats_f_per_type"]["spring"]["p"] == 1 / 1 + assert scores["cats_f_per_type"]["spring"]["r"] == 1 / 2 + assert scores["cats_f_per_type"]["autumn"]["p"] == 2 / 2 + assert scores["cats_f_per_type"]["autumn"]["r"] == 2 / 2 - assert scores["cats_micro_p"] == 4/5 - assert scores["cats_micro_r"] == 4/6 + assert scores["cats_micro_p"] == 4 / 5 + assert scores["cats_micro_r"] == 4 / 6 diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py index f84b78247..06212e351 100644 --- a/spacy/tests/pipeline/test_tok2vec.py +++ b/spacy/tests/pipeline/test_tok2vec.py @@ -73,8 +73,7 @@ def test_tok2vec_configs(width, embed_arch, embed_config, encode_arch, encode_co encode_config["width"] = width docs = get_batch(3) tok2vec = build_Tok2Vec_model( - embed_arch(**embed_config), - encode_arch(**encode_config) + embed_arch(**embed_config), encode_arch(**encode_config) ) tok2vec.initialize(docs) vectors, backprop = tok2vec.begin_update(docs) diff --git a/spacy/tests/regression/test_issue3501-4000.py b/spacy/tests/regression/test_issue3501-4000.py index 31e441d86..0505571c2 100644 --- a/spacy/tests/regression/test_issue3501-4000.py +++ b/spacy/tests/regression/test_issue3501-4000.py @@ -229,9 +229,7 @@ def test_issue3611(): batches = minibatch(train_data, size=compounding(4.0, 32.0, 1.001)) for batch in batches: - nlp.update( - examples=batch, sgd=optimizer, drop=0.1, losses=losses, - ) + nlp.update(examples=batch, sgd=optimizer, drop=0.1, losses=losses) def test_issue3625(): @@ -390,7 +388,7 @@ def test_issue3959(): def test_issue3962(en_vocab): - """ Ensure that as_doc does not result in out-of-bound access of tokens. + """Ensure that as_doc does not result in out-of-bound access of tokens. This is achieved by setting the head to itself if it would lie out of the span otherwise.""" # fmt: off words = ["He", "jests", "at", "scars", ",", "that", "never", "felt", "a", "wound", "."] @@ -428,7 +426,7 @@ def test_issue3962(en_vocab): def test_issue3962_long(en_vocab): - """ Ensure that as_doc does not result in out-of-bound access of tokens. + """Ensure that as_doc does not result in out-of-bound access of tokens. This is achieved by setting the head to itself if it would lie out of the span otherwise.""" # fmt: off words = ["He", "jests", "at", "scars", ".", "They", "never", "felt", "a", "wound", "."] @@ -463,8 +461,7 @@ def test_issue3962_long(en_vocab): def test_issue3972(en_vocab): - """Test that the PhraseMatcher returns duplicates for duplicate match IDs. - """ + """Test that the PhraseMatcher returns duplicates for duplicate match IDs.""" matcher = PhraseMatcher(en_vocab) matcher.add("A", [Doc(en_vocab, words=["New", "York"])]) matcher.add("B", [Doc(en_vocab, words=["New", "York"])]) diff --git a/spacy/tests/regression/test_issue4001-4500.py b/spacy/tests/regression/test_issue4001-4500.py index 753cff37f..0e2579ac4 100644 --- a/spacy/tests/regression/test_issue4001-4500.py +++ b/spacy/tests/regression/test_issue4001-4500.py @@ -19,8 +19,7 @@ from ..util import make_tempdir def test_issue4002(en_vocab): - """Test that the PhraseMatcher can match on overwritten NORM attributes. - """ + """Test that the PhraseMatcher can match on overwritten NORM attributes.""" matcher = PhraseMatcher(en_vocab, attr="NORM") pattern1 = Doc(en_vocab, words=["c", "d"]) assert [t.norm_ for t in pattern1] == ["c", "d"] @@ -72,9 +71,7 @@ def test_issue4030(): batches = minibatch(train_data, size=compounding(4.0, 32.0, 1.001)) for batch in batches: - nlp.update( - examples=batch, sgd=optimizer, drop=0.1, losses=losses, - ) + nlp.update(examples=batch, sgd=optimizer, drop=0.1, losses=losses) # processing of an empty doc should result in 0.0 for all categories doc = nlp("") assert doc.cats["offensive"] == 0.0 diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py index ee103208c..bba71d6da 100644 --- a/spacy/tests/test_cli.py +++ b/spacy/tests/test_cli.py @@ -7,7 +7,7 @@ from spacy.cli.init_config import init_config, RECOMMENDATIONS from spacy.cli._util import validate_project_commands, parse_config_overrides from spacy.cli._util import load_project_config, substitute_project_variables from spacy.cli._util import string_to_list, OVERRIDES_ENV_VAR -from thinc.api import ConfigValidationError, Config +from thinc.api import ConfigValidationError import srsly import os diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py index 6a487303e..917e7552e 100644 --- a/spacy/tests/test_language.py +++ b/spacy/tests/test_language.py @@ -290,9 +290,7 @@ def test_spacy_blank(): assert nlp.meta["name"] == "my_custom_model" -@pytest.mark.parametrize( - "value", [False, None, ["x", "y"], Language, Vocab], -) +@pytest.mark.parametrize("value", [False, None, ["x", "y"], Language, Vocab]) def test_language_init_invalid_vocab(value): err_fragment = "invalid value" with pytest.raises(ValueError) as e: diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py index 8f1bb1c3d..a123f459d 100644 --- a/spacy/tests/test_models.py +++ b/spacy/tests/test_models.py @@ -64,7 +64,7 @@ def get_tok2vec_kwargs(): width=32, rows=500, also_embed_subwords=True, also_use_static_vectors=False ), "encode": MaxoutWindowEncoder( - width=32, depth=2, maxout_pieces=2, window_size=1, + width=32, depth=2, maxout_pieces=2, window_size=1 ), } diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py index 2825f1703..89864d579 100644 --- a/spacy/tests/test_scorer.py +++ b/spacy/tests/test_scorer.py @@ -137,7 +137,7 @@ def test_las_per_type(en_vocab): examples = [] for input_, annot in test_las_apple: doc = Doc( - en_vocab, words=input_.split(" "), heads=annot["heads"], deps=annot["deps"], + en_vocab, words=input_.split(" "), heads=annot["heads"], deps=annot["deps"] ) gold = {"heads": annot["heads"], "deps": annot["deps"]} example = Example.from_dict(doc, gold) diff --git a/spacy/tests/training/test_training.py b/spacy/tests/training/test_training.py index 454f412e1..81e533a5a 100644 --- a/spacy/tests/training/test_training.py +++ b/spacy/tests/training/test_training.py @@ -496,8 +496,10 @@ def test_make_orth_variants(doc): output_file = tmpdir / "roundtrip.spacy" DocBin(docs=[doc]).to_disk(output_file) # due to randomness, test only that this runs with no errors for now - reader = Corpus(output_file, augmenter=create_orth_variants_augmenter(level=0.2, lower=0.5)) - train_examples = list(reader(nlp)) + reader = Corpus( + output_file, augmenter=create_orth_variants_augmenter(level=0.2, lower=0.5) + ) + list(reader(nlp)) @pytest.mark.skip("Outdated") diff --git a/spacy/training/augment.py b/spacy/training/augment.py index 4d487ce93..1756144e6 100644 --- a/spacy/training/augment.py +++ b/spacy/training/augment.py @@ -23,7 +23,7 @@ def dont_augment(nlp, example): yield example -def orth_variants_augmenter(nlp, example, *, level: float = 0.0, lower: float=0.0): +def orth_variants_augmenter(nlp, example, *, level: float = 0.0, lower: float = 0.0): if random.random() >= level: yield example else: @@ -36,14 +36,14 @@ def orth_variants_augmenter(nlp, example, *, level: float = 0.0, lower: float=0. nlp, raw_text, orig_dict["token_annotation"], - lower=raw_text is not None and random.random() < lower + lower=raw_text is not None and random.random() < lower, ) doc = nlp.make_doc(variant_text) orig_dict["token_annotation"] = variant_token_annot yield example.from_dict(doc, orig_dict) -def make_orth_variants(nlp, raw, token_dict, *, lower: bool=False): +def make_orth_variants(nlp, raw, token_dict, *, lower: bool = False): orig_token_dict = copy.deepcopy(token_dict) orth_variants = nlp.vocab.lookups.get_table("orth_variants", {}) ndsv = orth_variants.get("single", []) diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py index 09ac2b0ac..267b77f05 100644 --- a/spacy/training/initialize.py +++ b/spacy/training/initialize.py @@ -188,8 +188,8 @@ def verify_textcat_config(nlp: "Language", pipe_config: Dict[str, Any]) -> None: def get_sourced_components(config: Union[Dict[str, Any], Config]) -> List[str]: """RETURNS (List[str]): All sourced components in the original config, - e.g. {"source": "en_core_web_sm"}. If the config contains a key - "factory", we assume it refers to a component factory. + e.g. {"source": "en_core_web_sm"}. If the config contains a key + "factory", we assume it refers to a component factory. """ return [ name diff --git a/spacy/training/pretrain.py b/spacy/training/pretrain.py index 5e136cdf1..4f05c6344 100644 --- a/spacy/training/pretrain.py +++ b/spacy/training/pretrain.py @@ -94,7 +94,7 @@ def ensure_docs(examples_or_docs: Iterable[Union[Doc, Example]]) -> List[Doc]: def _resume_model( - model: Model, resume_path: Path, epoch_resume: int, silent: bool = True, + model: Model, resume_path: Path, epoch_resume: int, silent: bool = True ) -> None: msg = Printer(no_print=silent) msg.info(f"Resume training tok2vec from: {resume_path}") diff --git a/spacy/util.py b/spacy/util.py index 2584d4752..2c33d737e 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -488,7 +488,7 @@ def load_config_from_str( RETURNS (Config): The loaded config. """ return Config(section_order=CONFIG_SECTION_ORDER).from_str( - text, overrides=overrides, interpolate=interpolate, + text, overrides=overrides, interpolate=interpolate )