diff --git a/examples/pipeline/wikidata_entity_linking.py b/examples/pipeline/wikidata_entity_linking.py
index 5065648ef..cd6cc7c40 100644
--- a/examples/pipeline/wikidata_entity_linking.py
+++ b/examples/pipeline/wikidata_entity_linking.py
@@ -10,6 +10,13 @@ import datetime
 import bz2
 from spacy.kb import KnowledgeBase
 
+# TODO: remove hardcoded paths
+WIKIDATA_JSON = 'C:/Users/Sofie/Documents/data/wikidata/wikidata-20190304-all.json.bz2'
+ENWIKI_DUMP = 'C:/Users/Sofie/Documents/data/wikipedia/enwiki-20190320-pages-articles-multistream.xml.bz2'
+ENWIKI_INDEX = 'C:/Users/Sofie/Documents/data/wikipedia/enwiki-20190320-pages-articles-multistream-index.txt.bz2'
+PRIOR_PROB = 'C:/Users/Sofie/Documents/data/wikipedia/prior_prob.csv'
+
+
 # these will/should be matched ignoring case
 wiki_namespaces = ["b", "betawikiversity", "Book", "c", "Category", "Commons",
                    "d", "dbdump", "download", "Draft", "Education", "Foundation",
@@ -28,16 +35,14 @@ wiki_namespaces = ["b", "betawikiversity", "Book", "c", "Category", "Commons",
 map_alias_to_link = dict()
 
 
-def create_kb(vocab):
+def create_kb(vocab, max_entities_per_alias, min_occ):
     kb = KnowledgeBase(vocab=vocab)
+
+    _add_entities(kb)
+    _add_aliases(kb, max_entities_per_alias, min_occ)
+
     # _read_wikidata()
-    _read_wikipedia()
-
-    # adding entities
-    # kb.add_entity(entity=entity, prob=prob)
-
-    # adding aliases
-    # kb.add_alias(alias=alias, entities=[entity_0, entity_1, entity_2], probabilities=[0.6, 0.1, 0.2])
+    # _read_wikipedia()
 
     print()
     print("kb size:", len(kb), kb.get_size_entities(), kb.get_size_aliases())
@@ -45,6 +50,66 @@ def create_kb(vocab):
     return kb
 
 
+def _add_entities(kb):
+
+    kb.add_entity(entity="Earthquake", prob=0.342)
+    kb.add_entity(entity="2010 haiti earthquake", prob=0.1)
+    kb.add_entity(entity="1906 san francisco earthquake", prob=0.1)
+    kb.add_entity(entity="2011 christchurch earthquak", prob=0.1)
+
+    kb.add_entity(entity="Soft drink", prob=0.342)
+
+    print("added", kb.get_size_entities(), "entities:", kb.get_entity_strings())
+
+
+def _add_aliases(kb, max_entities_per_alias, min_occ):
+    all_entities = kb.get_entity_strings()
+    # adding aliases with prior probabilities
+    with open(PRIOR_PROB, mode='r', encoding='utf8') as prior_file:
+        # skip header
+        prior_file.readline()
+        line = prior_file.readline()
+        # we can read this file sequentially, it's sorted by alias, and then by count
+        previous_alias = None
+        total_count = 0
+        counts = list()
+        entities = list()
+        while line:
+            splits = line.replace('\n', "").split(sep='|')
+            new_alias = splits[0]
+            count = int(splits[1])
+            entity = splits[2]
+
+            if new_alias != previous_alias and previous_alias:
+                # done reading the previous alias --> output
+                if len(entities) > 0:
+                    selected_entities = list()
+                    prior_probs = list()
+                    for ent_count, ent_string in zip(counts, entities):
+                        if ent_string in all_entities:
+                            p_entity_givenalias = ent_count / total_count
+                            selected_entities.append(ent_string)
+                            prior_probs.append(p_entity_givenalias)
+
+                    if selected_entities:
+                        kb.add_alias(alias=previous_alias, entities=selected_entities, probabilities=prior_probs)
+                total_count = 0
+                counts = list()
+                entities = list()
+
+            total_count += count
+
+            if len(entities) < max_entities_per_alias and count >= min_occ:
+                counts.append(count)
+                entities.append(entity)
+            previous_alias = new_alias
+
+            line = prior_file.readline()
+
+    print()
+    print("added", kb.get_size_aliases(), "aliases:", kb.get_alias_strings())
+
+
 def _read_wikidata():
     """ Read the JSON wiki data """
     # TODO remove hardcoded path
@@ -53,7 +118,7 @@ def _read_wikidata():
     properties = {'P31'}
     sites = {'enwiki'}
 
-    with bz2.open('C:/Users/Sofie/Documents/data/wikidata/wikidata-20190304-all.json.bz2', mode='rb') as file:
+    with bz2.open(WIKIDATA_JSON, mode='rb') as file:
         line = file.readline()
         cnt = 1
         while line and cnt < 100000:
@@ -124,8 +189,7 @@ def _read_wikipedia_prior_probs():
 
     ns_regex = re.compile(ns_regex, re.IGNORECASE)
 
-    # TODO remove hardcoded path
-    with bz2.open('C:/Users/Sofie/Documents/data/wikipedia/enwiki-20190320-pages-articles-multistream.xml.bz2', mode='rb') as file:
+    with bz2.open(ENWIKI_DUMP, mode='rb') as file:
         line = file.readline()
         cnt = 0
         while line:
@@ -159,9 +223,8 @@ def _read_wikipedia_prior_probs():
             line = file.readline()
             cnt += 1
 
-    # only print aliases with more than one potential entity
-    # TODO remove hardcoded path
-    with open('C:/Users/Sofie/Documents/data/wikipedia/prior_prob.csv', mode='w', encoding='utf8') as outputfile:
+    # write all aliases and their entities and occurrences to file
+    with open(PRIOR_PROB, mode='w', encoding='utf8') as outputfile:
         outputfile.write("alias" + "|" + "count" + "|" + "entity" + "\n")
         for alias, alias_dict in sorted(map_alias_to_link.items(), key=lambda x: x[0]):
             for entity, count in sorted(alias_dict.items(), key=lambda x: x[1], reverse=True):
@@ -181,12 +244,11 @@ def _store_alias(alias, entity):
         alias_dict[clean_entity] = entity_count + 1
         map_alias_to_link[alias] = alias_dict
 
+
 def _read_wikipedia():
     """ Read the XML wikipedia data """
-    # TODO remove hardcoded path
 
-    # with bz2.open('C:/Users/Sofie/Documents/data/wikipedia/enwiki-20190320-pages-articles-multistream-index.txt.bz2', mode='rb') as file:
-    with bz2.open('C:/Users/Sofie/Documents/data/wikipedia/enwiki-20190320-pages-articles-multistream.xml.bz2', mode='rb') as file:
+    with bz2.open(ENWIKI_DUMP, mode='rb') as file:
         line = file.readline()
         cnt = 1
         article_text = ""
@@ -240,7 +302,6 @@ def _store_wp_article(article_id, article_title, article_text):
     print()
 
 
-
 def _get_clean_wp_text(article_text):
     # TODO: compile the regular expressions
 
@@ -300,10 +361,13 @@ def add_el(kb, nlp):
 
 
 if __name__ == "__main__":
-    _read_wikipedia_prior_probs()
+    # STEP 1 : create prior probabilities from WP
+    # run only once !
+    # _read_wikipedia_prior_probs()
 
-    # nlp = spacy.load('en_core_web_sm')
-    # my_kb = create_kb(nlp.vocab)
+    # STEP 2 : create KB
+    nlp = spacy.load('en_core_web_sm')
+    my_kb = create_kb(nlp.vocab, max_entities_per_alias=10, min_occ=5)
     # add_el(my_kb, nlp)
 
     # clean_text = "[[File:smomething]] jhk"
diff --git a/spacy/kb.pyx b/spacy/kb.pyx
index 97e86d01f..8a1710a9c 100644
--- a/spacy/kb.pyx
+++ b/spacy/kb.pyx
@@ -54,8 +54,14 @@ cdef class KnowledgeBase:
     def get_size_entities(self):
         return self._entries.size() - 1  # not counting dummy element on index 0
 
+    def get_entity_strings(self):
+        return [self.vocab.strings[x] for x in self._entry_index][1:] # removing the dummy element on index 0
+
     def get_size_aliases(self):
-        return self._aliases_table.size() - 1 # not counting dummy element on index 0
+        return self._aliases_table.size() - 1 # not counting dummy element on index
+
+    def get_alias_strings(self):
+        return [self.vocab.strings[x] for x in self._alias_index][1:] # removing the dummy element on index 0
 
     def add_entity(self, unicode entity, float prob=0.5, vectors=None, features=None):
         """