mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 13:47:13 +03:00
2d249a9502
* fix overflow error on windows * more documentation & logging fixes * md fix * 3 different limit parameters to play with execution time * bug fixes directory locations * small fixes * exclude dev test articles from prior probabilities stats * small fixes * filtering wikidata entities, removing numeric and meta items * adding aliases from wikidata also to the KB * fix adding WD aliases * adding also new aliases to previously added entities * fixing comma's * small doc fixes * adding subclassof filtering * append alias functionality in KB * prevent appending the same entity-alias pair * fix for appending WD aliases * remove date filter * remove unnecessary import * small corrections and reformatting * remove WD aliases for now (too slow) * removing numeric entities from training and evaluation * small fixes * shortcut during prediction if there is only one candidate * add counts and fscore logging, remove FP NER from evaluation * fix entity_linker.predict to take docs instead of single sentences * remove enumeration sentences from the WP dataset * entity_linker.update to process full doc instead of single sentence * spelling corrections and dump locations in readme * NLP IO fix * reading KB is unnecessary at the end of the pipeline * small logging fix * remove empty files
13 lines
371 B
Python
13 lines
371 B
Python
TRAINING_DATA_FILE = "gold_entities.jsonl"
|
|
KB_FILE = "kb"
|
|
KB_MODEL_DIR = "nlp_kb"
|
|
OUTPUT_MODEL_DIR = "nlp"
|
|
|
|
PRIOR_PROB_PATH = "prior_prob.csv"
|
|
ENTITY_DEFS_PATH = "entity_defs.csv"
|
|
ENTITY_FREQ_PATH = "entity_freq.csv"
|
|
ENTITY_ALIAS_PATH = "entity_alias.csv"
|
|
ENTITY_DESCR_PATH = "entity_descriptions.csv"
|
|
|
|
LOG_FORMAT = '%(asctime)s - %(levelname)s - %(name)s - %(message)s'
|