small fixes

2025-07-23 06:29:48 +03:00 · 2019-06-24 10:55:04 +02:00 · 2019-06-24 10:55:04 +02:00 · b58bace84b
commit b58bace84b
parent b76a43bee4
5 changed files with 21 additions and 22 deletions
--- a/bin/ud/conll17_ud_eval.py
+++ b/bin/ud/conll17_ud_eval.py
@ -292,8 +292,8 @@ def evaluate(gold_ud, system_ud, deprel_weights=None, check_parse=True):

    def spans_score(gold_spans, system_spans):
        correct, gi, si = 0, 0, 0
-        undersegmented = list()
-        oversegmented = list()
+        undersegmented = []
+        oversegmented = []
        combo = 0
        previous_end_si_earlier = False
        previous_end_gi_earlier = False
--- a/bin/wiki_entity_linking/kb_creator.py
+++ b/bin/wiki_entity_linking/kb_creator.py
@ -42,9 +42,9 @@ def create_kb(nlp, max_entities_per_alias, min_entity_freq, min_occ,

    # filter the entities for in the KB by frequency, because there's just too much data (8M entities) otherwise
    filtered_title_to_id = dict()
-    entity_list = list()
-    description_list = list()
-    frequency_list = list()
+    entity_list = []
+    description_list = []
+    frequency_list = []
    for title, entity in title_to_id.items():
        freq = entity_frequencies.get(title, 0)
        desc = id_to_descr.get(entity, None)
@ -131,8 +131,8 @@ def _add_aliases(kb, title_to_id, max_entities_per_alias, min_occ, prior_prob_in
        line = prior_file.readline()
        previous_alias = None
        total_count = 0
-        counts = list()
-        entities = list()
+        counts = []
+        entities = []
        while line:
            splits = line.replace('\n', "").split(sep='|')
            new_alias = splits[0]
@ -142,8 +142,8 @@ def _add_aliases(kb, title_to_id, max_entities_per_alias, min_occ, prior_prob_in
            if new_alias != previous_alias and previous_alias:
                # done reading the previous alias --> output
                if len(entities) > 0:
-                    selected_entities = list()
-                    prior_probs = list()
+                    selected_entities = []
+                    prior_probs = []
                    for ent_count, ent_string in zip(counts, entities):
                        if ent_string in wp_titles:
                            wd_id = title_to_id[ent_string]
@ -157,8 +157,8 @@ def _add_aliases(kb, title_to_id, max_entities_per_alias, min_occ, prior_prob_in
                        except ValueError as e:
                            print(e)
                total_count = 0
-                counts = list()
-                entities = list()
+                counts = []
+                entities = []

            total_count += count

--- a/bin/wiki_entity_linking/training_set_creator.py
+++ b/bin/wiki_entity_linking/training_set_creator.py
@ -343,7 +343,7 @@ def read_training(nlp, training_dir, dev, limit):
                                # currently feeding the gold data one entity per sentence at a time
                                gold_start = int(start) - found_ent.sent.start_char
                                gold_end = int(end) - found_ent.sent.start_char
-                                gold_entities = list()
+                                gold_entities = []
                                gold_entities.append((gold_start, gold_end, wp_title))
                                gold = GoldParse(doc=sent, links=gold_entities)
                                data.append((sent, gold))
--- a/examples/pipeline/wikidata_entity_linking.py
+++ b/examples/pipeline/wikidata_entity_linking.py
@ -147,7 +147,7 @@ def run_pipeline():
    if train_pipe:
        print("STEP 6: training Entity Linking pipe", datetime.datetime.now())
        # define the size (nr of entities) of training and dev set
-        train_limit = 10000
+        train_limit = 5000
        dev_limit = 5000

        train_data = training_set_creator.read_training(nlp=nlp_2,
@ -332,7 +332,7 @@ def _measure_baselines(data, kb):
                    best_candidate = ""
                    random_candidate = ""
                    if candidates:
-                        scores = list()
+                        scores = []

                        for c in candidates:
                            scores.append(c.prior_prob)
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@ -1131,8 +1131,8 @@ class EntityLinker(Pipe):
            docs = [docs]
            golds = [golds]

-        context_docs = list()
-        entity_encodings = list()
+        context_docs = []
+        entity_encodings = []

        for doc, gold in zip(docs, golds):
            for entity in gold.links:
@ -1198,8 +1198,8 @@ class EntityLinker(Pipe):
        self.require_model()
        self.require_kb()

-        final_entities = list()
-        final_kb_ids = list()
+        final_entities = []
+        final_kb_ids = []

        if not docs:
            return final_entities, final_kb_ids
@ -1214,7 +1214,7 @@ class EntityLinker(Pipe):
                for ent in doc.ents:
                    candidates = self.kb.get_candidates(ent.text)
                    if candidates:
-                        scores = list()
+                        scores = []
                        for c in candidates:
                            prior_prob = c.prior_prob * self.prior_weight
                            kb_id = c.entity_
@ -1259,11 +1259,10 @@ class EntityLinker(Pipe):
        return self

    def rehearse(self, docs, sgd=None, losses=None, **config):
-        # TODO
-        pass
+        raise NotImplementedError

    def add_label(self, label):
-        pass
+        raise NotImplementedError


 class Sentencizer(object):