diff --git a/fabfile.py b/fabfile.py index 460471747..760c2c0e2 100644 --- a/fabfile.py +++ b/fabfile.py @@ -1,6 +1,6 @@ import contextlib from pathlib import Path -from fabric.api import local, lcd, env, settings, prefix +from fabric.api import local, lcd from os import path, environ import shutil import sys @@ -79,9 +79,7 @@ def pex(): with virtualenv(VENV_DIR) as venv_local: with lcd(path.dirname(__file__)): sha = local("git rev-parse --short HEAD", capture=True) - venv_local( - "pex dist/*.whl -e spacy -o dist/spacy-%s.pex" % sha, direct=True - ) + venv_local(f"pex dist/*.whl -e spacy -o dist/spacy-{sha}.pex", direct=True) def clean(): diff --git a/spacy/__main__.py b/spacy/__main__.py index 06ba5704d..05e3d5e02 100644 --- a/spacy/__main__.py +++ b/spacy/__main__.py @@ -22,7 +22,7 @@ if __name__ == "__main__": if len(sys.argv) == 1: msg.info("Available commands", ", ".join(commands), exits=1) command = sys.argv.pop(1) - sys.argv[0] = "spacy %s" % command + sys.argv[0] = f"spacy {command}" if command in commands: plac.call(commands[command], sys.argv[1:]) else: diff --git a/spacy/_ml.py b/spacy/_ml.py index a1d2b6b77..37cfff0b7 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -296,7 +296,7 @@ def link_vectors_to_models(vocab): # This is a hack to avoid the problem in #3853. Maybe we should # print a warning as well? old_name = vectors.name - new_name = vectors.name + "_%d" % data.shape[0] + new_name = f"{vectors.name}_{data.shape[0]}" user_warning(Warnings.W019.format(old=old_name, new=new_name)) vectors.name = new_name key = (ops.device, vectors.name) diff --git a/spacy/cli/evaluate.py b/spacy/cli/evaluate.py index de2cb4d09..a6b730d65 100644 --- a/spacy/cli/evaluate.py +++ b/spacy/cli/evaluate.py @@ -47,20 +47,20 @@ def evaluate( end = timer() nwords = sum(len(ex.doc) for ex in dev_dataset) results = { - "Time": "%.2f s" % (end - begin), + "Time": f"{end - begin:.2f} s", "Words": nwords, - "Words/s": "%.0f" % (nwords / (end - begin)), - "TOK": "%.2f" % scorer.token_acc, - "POS": "%.2f" % scorer.tags_acc, - "UAS": "%.2f" % scorer.uas, - "LAS": "%.2f" % scorer.las, - "NER P": "%.2f" % scorer.ents_p, - "NER R": "%.2f" % scorer.ents_r, - "NER F": "%.2f" % scorer.ents_f, - "Textcat": "%.2f" % scorer.textcat_score, - "Sent P": "%.2f" % scorer.sent_p, - "Sent R": "%.2f" % scorer.sent_r, - "Sent F": "%.2f" % scorer.sent_f, + "Words/s": f"{nwords / (end - begin):.0f}", + "TOK": f"{scorer.token_acc:.2f}", + "POS": f"{scorer.tags_acc:.2f}", + "UAS": f"{scorer.uas:.2f}", + "LAS": f"{scorer.las:.2f}", + "NER P": f"{scorer.ents_p:.2f}", + "NER R": f"{scorer.ents_r:.2f}", + "NER F": f"{scorer.ents_f:.2f}", + "Textcat": f"{scorer.textcat_score:.2f}", + "Sent P": f"{scorer.sent_p:.2f}", + "Sent R": f"{scorer.sent_r:.2f}", + "Sent F": f"{scorer.sent_f:.2f}", } msg.table(results, title="Results") diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index c3ef5267c..87583ba73 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -186,7 +186,7 @@ def add_vectors(nlp, vectors_loc, prune_vectors, name=None): if vectors_data is not None: nlp.vocab.vectors = Vectors(data=vectors_data, keys=vector_keys) if name is None: - nlp.vocab.vectors.name = "%s_model.vectors" % nlp.meta["lang"] + nlp.vocab.vectors.name = f"{nlp.meta['lang']}_model.vectors" else: nlp.vocab.vectors.name = name nlp.meta["vectors"]["name"] = nlp.vocab.vectors.name @@ -232,7 +232,7 @@ def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50): word = literal_eval(key) except SyntaxError: # Take odd strings literally. - word = literal_eval("'%s'" % key) + word = literal_eval(f"'{key}'") smooth_count = counts.smoother(int(freq)) probs[word] = math.log(smooth_count) - log_total oov_prob = math.log(counts.smoother(0)) - log_total diff --git a/spacy/cli/package.py b/spacy/cli/package.py index 8830a0ca2..edd9117c5 100644 --- a/spacy/cli/package.py +++ b/spacy/cli/package.py @@ -83,7 +83,7 @@ def generate_meta(model_path, existing_meta, msg): ("lang", "Model language", meta.get("lang", "en")), ("name", "Model name", meta.get("name", "model")), ("version", "Model version", meta.get("version", "0.0.0")), - ("spacy_version", "Required spaCy version", ">=%s,<3.0.0" % about.__version__), + ("spacy_version", "Required spaCy version", f">={about.__version__},<3.0.0"), ("description", "Model description", meta.get("description", False)), ("author", "Author", meta.get("author", False)), ("email", "Author email", meta.get("email", False)), diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py index 75840923e..12aa8b5c2 100644 --- a/spacy/cli/pretrain.py +++ b/spacy/cli/pretrain.py @@ -179,14 +179,12 @@ def pretrain( else: if not epoch_start: msg.fail( - "You have to use the '--epoch-start' argument when using a renamed weight file for " - "'--init-tok2vec'", + "You have to use the --epoch-start argument when using a renamed weight file for --init-tok2vec", exits=True, ) elif epoch_start < 0: msg.fail( - "The argument '--epoch-start' has to be greater or equal to 0. '%d' is invalid" - % epoch_start, + f"The argument --epoch-start has to be greater or equal to 0. {epoch_start} is invalid", exits=True, ) else: @@ -195,16 +193,14 @@ def pretrain( optimizer = create_default_optimizer(model.ops) tracker = ProgressTracker(frequency=10000) - msg.divider("Pre-training tok2vec layer - starting at epoch %d" % epoch_start) + msg.divider(f"Pre-training tok2vec layer - starting at epoch {epoch_start}") row_settings = {"widths": (3, 10, 10, 6, 4), "aligns": ("r", "r", "r", "r", "r")} msg.row(("#", "# Words", "Total Loss", "Loss", "w/s"), **row_settings) def _save_model(epoch, is_temp=False): is_temp_str = ".temp" if is_temp else "" with model.use_params(optimizer.averages): - with (output_dir / ("model%d%s.bin" % (epoch, is_temp_str))).open( - "wb" - ) as file_: + with (output_dir / f"model{epoch}{is_temp_str}.bin").open("wb") as file_: file_.write(model.tok2vec.to_bytes()) log = { "nr_word": tracker.nr_word, diff --git a/spacy/cli/train.py b/spacy/cli/train.py index e8662a101..df5456df3 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -375,7 +375,7 @@ def train( words_seen += sum(len(doc) for doc in docs) with nlp.use_params(optimizer.averages): util.set_env_log(False) - epoch_model_path = output_path / ("model%d" % i) + epoch_model_path = output_path / f"model{i}" nlp.to_disk(epoch_model_path) nlp_loaded = util.load_model_from_path(epoch_model_path) for beam_width in eval_beam_widths: @@ -414,13 +414,13 @@ def train( scorer = nlp_loaded.evaluate(dev_dataset, verbose=verbose) end_time = timer() cpu_wps = nwords / (end_time - start_time) - acc_loc = output_path / ("model%d" % i) / "accuracy.json" + acc_loc = output_path / f"model{i}" / "accuracy.json" srsly.write_json(acc_loc, scorer.scores) # Update model meta.json meta["lang"] = nlp.lang meta["pipeline"] = nlp.pipe_names - meta["spacy_version"] = ">=%s" % about.__version__ + meta["spacy_version"] = f">={about.__version__}" if beam_width == 1: meta["speed"] = { "nwords": nwords, @@ -443,10 +443,10 @@ def train( "keys": nlp.vocab.vectors.n_keys, "name": nlp.vocab.vectors.name, } - meta.setdefault("name", "model%d" % i) + meta.setdefault("name", f"model{i}") meta.setdefault("version", version) meta["labels"] = nlp.meta["labels"] - meta_loc = output_path / ("model%d" % i) / "meta.json" + meta_loc = output_path / f"model{i}" / "meta.json" srsly.write_json(meta_loc, meta) util.set_env_log(verbose) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index e3af40d4d..1d3d8e034 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -615,7 +615,7 @@ def _consume_ent(tags): else: start = "B-" + label end = "L-" + label - middle = ["I-%s" % label for _ in range(1, length - 1)] + middle = [f"I-{label}" for _ in range(1, length - 1)] return [start] + middle + [end] @@ -1204,12 +1204,12 @@ def biluo_tags_from_offsets(doc, entities, missing="O"): # Only interested if the tokenization is correct if start_token is not None and end_token is not None: if start_token == end_token: - biluo[start_token] = "U-%s" % label + biluo[start_token] = f"U-{label}" else: - biluo[start_token] = "B-%s" % label + biluo[start_token] = f"B-{label}" for i in range(start_token+1, end_token): - biluo[i] = "I-%s" % label - biluo[end_token] = "L-%s" % label + biluo[i] = f"I-{label}" + biluo[end_token] = f"L-{label}" # Now distinguish the O cases from ones where we miss the tokenization entity_chars = set() for start_char, end_char, label in entities: diff --git a/spacy/kb.pyx b/spacy/kb.pyx index 1129fa860..64fbb1e29 100644 --- a/spacy/kb.pyx +++ b/spacy/kb.pyx @@ -442,7 +442,7 @@ cdef class KnowledgeBase: cdef class Writer: def __init__(self, object loc): if path.exists(loc): - assert not path.isdir(loc), "%s is directory." % loc + assert not path.isdir(loc), f"{loc} is directory" if isinstance(loc, Path): loc = bytes(loc) cdef bytes bytes_loc = loc.encode('utf8') if type(loc) == unicode else loc diff --git a/spacy/lang/ca/tokenizer_exceptions.py b/spacy/lang/ca/tokenizer_exceptions.py index 5a9d9055a..b4ae61a2d 100644 --- a/spacy/lang/ca/tokenizer_exceptions.py +++ b/spacy/lang/ca/tokenizer_exceptions.py @@ -30,9 +30,9 @@ _exc["12m."] = [{ORTH: "12"}, {ORTH: "m.", LEMMA: "p.m."}] for h in range(1, 12 + 1): for period in ["a.m.", "am"]: - _exc["%d%s" % (h, period)] = [{ORTH: "%d" % h}, {ORTH: period, LEMMA: "a.m."}] + _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, LEMMA: "a.m."}] for period in ["p.m.", "pm"]: - _exc["%d%s" % (h, period)] = [{ORTH: "%d" % h}, {ORTH: period, LEMMA: "p.m."}] + _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, LEMMA: "p.m."}] TOKENIZER_EXCEPTIONS = _exc diff --git a/spacy/lang/da/tokenizer_exceptions.py b/spacy/lang/da/tokenizer_exceptions.py index 64eba819f..c8ea9cbf5 100644 --- a/spacy/lang/da/tokenizer_exceptions.py +++ b/spacy/lang/da/tokenizer_exceptions.py @@ -559,7 +559,7 @@ for exc_data in [ # Dates for h in range(1, 31 + 1): for period in ["."]: - _exc["%d%s" % (h, period)] = [{ORTH: "%d." % h}] + _exc[f"{h}{period}"] = [{ORTH: f"{h}."}] _custom_base_exc = {"i.": [{ORTH: "i", LEMMA: "i", NORM: "i"}, {ORTH: ".", TAG: PUNCT}]} _exc.update(_custom_base_exc) diff --git a/spacy/lang/el/tokenizer_exceptions.py b/spacy/lang/el/tokenizer_exceptions.py index 27ae1fe3a..112fd991b 100644 --- a/spacy/lang/el/tokenizer_exceptions.py +++ b/spacy/lang/el/tokenizer_exceptions.py @@ -131,14 +131,14 @@ _exc.update(_other_exc) for h in range(1, 12 + 1): for period in ["π.μ.", "πμ"]: - _exc["%d%s" % (h, period)] = [ - {ORTH: "%d" % h}, + _exc[f"{h}{period}"] = [ + {ORTH: f"{h}"}, {ORTH: period, LEMMA: "π.μ.", NORM: "π.μ."}, ] for period in ["μ.μ.", "μμ"]: - _exc["%d%s" % (h, period)] = [ - {ORTH: "%d" % h}, + _exc[f"{h}{period}"] = [ + {ORTH: f"{h}"}, {ORTH: period, LEMMA: "μ.μ.", NORM: "μ.μ."}, ] diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py index 776948c28..3e8075ec4 100644 --- a/spacy/lang/en/tokenizer_exceptions.py +++ b/spacy/lang/en/tokenizer_exceptions.py @@ -328,13 +328,13 @@ for exc_data in [ for h in range(1, 12 + 1): for period in ["a.m.", "am"]: - _exc["%d%s" % (h, period)] = [ - {ORTH: "%d" % h}, + _exc[f"{h}{period}"] = [ + {ORTH: f"{h}"}, {ORTH: period, LEMMA: "a.m.", NORM: "a.m."}, ] for period in ["p.m.", "pm"]: - _exc["%d%s" % (h, period)] = [ - {ORTH: "%d" % h}, + _exc[f"{h}{period}"] = [ + {ORTH: f"{h}"}, {ORTH: period, LEMMA: "p.m.", NORM: "p.m."}, ] diff --git a/spacy/lang/es/tokenizer_exceptions.py b/spacy/lang/es/tokenizer_exceptions.py index 1cd5941be..5c7fcb15d 100644 --- a/spacy/lang/es/tokenizer_exceptions.py +++ b/spacy/lang/es/tokenizer_exceptions.py @@ -28,9 +28,9 @@ _exc["12m."] = [{ORTH: "12"}, {ORTH: "m.", LEMMA: "p.m."}] for h in range(1, 12 + 1): for period in ["a.m.", "am"]: - _exc["%d%s" % (h, period)] = [{ORTH: "%d" % h}, {ORTH: period, LEMMA: "a.m."}] + _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, LEMMA: "a.m."}] for period in ["p.m.", "pm"]: - _exc["%d%s" % (h, period)] = [{ORTH: "%d" % h}, {ORTH: period, LEMMA: "p.m."}] + _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, LEMMA: "p.m."}] for orth in [ diff --git a/spacy/lang/fr/tokenizer_exceptions.py b/spacy/lang/fr/tokenizer_exceptions.py index b1c0a53af..4e2e7fb18 100644 --- a/spacy/lang/fr/tokenizer_exceptions.py +++ b/spacy/lang/fr/tokenizer_exceptions.py @@ -85,7 +85,7 @@ for verb, verb_lemma in [("est", "être")]: for pre, pre_lemma in [("qu'", "que"), ("n'", "ne")]: for orth in [pre, pre.title()]: - _exc["%sest-ce" % orth] = [ + _exc[f"{orth}est-ce"] = [ {LEMMA: pre_lemma, ORTH: orth, TAG: "ADV"}, {LEMMA: "être", ORTH: "est", TAG: "VERB"}, {LEMMA: "ce", ORTH: "-ce"}, diff --git a/spacy/language.py b/spacy/language.py index 4a553bcaf..4ae729588 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1015,7 +1015,7 @@ def _fix_pretrained_vectors_name(nlp): elif not nlp.vocab.vectors.size: nlp.vocab.vectors.name = None elif "name" in nlp.meta and "lang" in nlp.meta: - vectors_name = "%s_%s.vectors" % (nlp.meta["lang"], nlp.meta["name"]) + vectors_name = f"{nlp.meta['lang']}_{nlp.meta['name']}.vectors" nlp.vocab.vectors.name = vectors_name else: raise ValueError(Errors.E092) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index f12691170..8030a9a28 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -72,7 +72,7 @@ def _normalize_props(props): # just take the first one :( if "|" in value: value = value.split("|")[0] - attr = '%s_%s' % (key, value) + attr = f"{key}_{value}" if attr in FEATURES: props.pop(key) props[attr] = True diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index ff88340cd..5ca651077 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -985,14 +985,14 @@ class MultitaskObjective(Tagger): offset = token_annotation.heads[i] - i offset = min(offset, 2) offset = max(offset, -2) - return "%s-%s:%d" % (token_annotation.deps[i], token_annotation.tags[i], offset) + return f"{token_annotation.deps[i]}-{token_annotation.tags[i]}:{offset}" @staticmethod def make_ent_tag(i, token_annotation): if token_annotation.entities is None or token_annotation.entities[i] is None: return None else: - return "%s-%s" % (token_annotation.tags[i], token_annotation.entities[i]) + return f"{token_annotation.tags[i]}-{token_annotation.entities[i]}" @staticmethod def make_sent_start(target, token_annotation, cache=True, _cache={}): diff --git a/spacy/syntax/nonproj.pyx b/spacy/syntax/nonproj.pyx index 0f738f99f..f024c1f05 100644 --- a/spacy/syntax/nonproj.pyx +++ b/spacy/syntax/nonproj.pyx @@ -154,8 +154,7 @@ def _decorate(heads, proj_heads, labels): deco_labels = [] for tokenid, head in enumerate(heads): if head != proj_heads[tokenid]: - deco_labels.append( - '%s%s%s' % (labels[tokenid], DELIMITER, labels[head])) + deco_labels.append(f"{labels[tokenid]}{DELIMITER}{labels[head]}") else: deco_labels.append(labels[tokenid]) return deco_labels diff --git a/spacy/syntax/stateclass.pyx b/spacy/syntax/stateclass.pyx index 47b37946c..e472e9861 100644 --- a/spacy/syntax/stateclass.pyx +++ b/spacy/syntax/stateclass.pyx @@ -46,9 +46,9 @@ cdef class StateClass: def print_state(self, words): words = list(words) + ['_'] - top = words[self.S(0)] + '_%d' % self.S_(0).head - second = words[self.S(1)] + '_%d' % self.S_(1).head - third = words[self.S(2)] + '_%d' % self.S_(2).head + top = f"{words[self.S(0)]}_{self.S_(0).head}" + second = f"{words[self.S(1)]}_{self.S_(1).head}" + third = f"{words[self.S(2)]}_{self.S_(2).head}" n0 = words[self.B(0)] n1 = words[self.B(1)] return ' '.join((third, second, top, '|', n0, n1)) diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index ba7b67e25..b391dd88e 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -14,11 +14,11 @@ def pytest_runtest_setup(item): # recognize the option we're asking about. To avoid this, we need to # pass a default value. We default to False, i.e., we act like all the # options weren't given. - return item.config.getoption("--%s" % opt, False) + return item.config.getoption(f"--{opt}", False) for opt in ["slow"]: if opt in item.keywords and not getopt(opt): - pytest.skip("need --%s option to run" % opt) + pytest.skip(f"need --{opt} option to run") # Fixtures for language tokenizers (languages sorted alphabetically) diff --git a/spacy/tests/pipeline/test_pipe_methods.py b/spacy/tests/pipeline/test_pipe_methods.py index 3ec8b508d..e2fb02a2a 100644 --- a/spacy/tests/pipeline/test_pipe_methods.py +++ b/spacy/tests/pipeline/test_pipe_methods.py @@ -115,7 +115,7 @@ def test_disable_pipes_list_arg(nlp): @pytest.mark.parametrize("n_pipes", [100]) def test_add_lots_of_pipes(nlp, n_pipes): for i in range(n_pipes): - nlp.add_pipe(lambda doc: doc, name="pipe_%d" % i) + nlp.add_pipe(lambda doc: doc, name=f"pipe_{i}") assert len(nlp.pipe_names) == n_pipes diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 58423c420..7e6473d56 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -499,7 +499,7 @@ cdef class Doc: token = &self.c[i] if token.ent_iob == 1: if start == -1: - seq = ["%s|%s" % (t.text, t.ent_iob_) for t in self[i-5:i+5]] + seq = [f"{t.text}|{t.ent_iob_}" for t in self[i-5:i+5]] raise ValueError(Errors.E093.format(seq=" ".join(seq))) elif token.ent_iob == 2 or token.ent_iob == 0: if start != -1: diff --git a/spacy/util.py b/spacy/util.py index 57bbee69f..55e197eb2 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -62,7 +62,7 @@ def get_lang_class(lang): return registry.languages.get(lang) else: try: - module = importlib.import_module(".lang.%s" % lang, "spacy") + module = importlib.import_module(f".lang.{lang}", "spacy") except ImportError as err: raise ImportError(Errors.E048.format(lang=lang, err=err)) set_lang_class(lang, getattr(module, module.__all__[0])) @@ -212,7 +212,7 @@ def load_model_from_init_py(init_file, **overrides): """ model_path = Path(init_file).parent meta = get_model_meta(model_path) - data_dir = "%s_%s-%s" % (meta["lang"], meta["name"], meta["version"]) + data_dir = f"{meta['lang']}_{meta['name']}-{meta['version']}" data_path = model_path / data_dir if not model_path.exists(): raise IOError(Errors.E052.format(path=data_path))