Fix handling of old entity ruler files

Expected an `entity_ruler.jsonl` file in the top-level model directory, so the path passed to from_disk by default (model path plus componentn name), but with the suffix ".jsonl".
This commit is contained in:
Ines Montani 2019-07-10 12:14:12 +02:00
parent 874d914a44
commit 570ab1f481
2 changed files with 6 additions and 6 deletions

View File

@ -266,8 +266,9 @@ class EntityRuler(object):
DOCS: https://spacy.io/api/entityruler#from_disk DOCS: https://spacy.io/api/entityruler#from_disk
""" """
path = ensure_path(path) path = ensure_path(path)
if path.is_file(): depr_patterns_path = path.with_suffix(".jsonl")
patterns = srsly.read_jsonl(path) if depr_patterns_path.is_file():
patterns = srsly.read_jsonl(depr_patterns_path)
self.add_patterns(patterns) self.add_patterns(patterns)
else: else:
cfg = {} cfg = {}

View File

@ -62,10 +62,9 @@ def test_entity_ruler_from_disk_old_format_safe(patterns, en_vocab):
nlp = Language(vocab=en_vocab) nlp = Language(vocab=en_vocab)
ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True) ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
with make_tempdir() as tmpdir: with make_tempdir() as tmpdir:
out_file = tmpdir / "entity_ruler.jsonl" out_file = tmpdir / "entity_ruler"
srsly.write_jsonl(out_file, ruler.patterns) srsly.write_jsonl(out_file.with_suffix(".jsonl"), ruler.patterns)
new_ruler = EntityRuler(nlp) new_ruler = EntityRuler(nlp).from_disk(out_file)
new_ruler = new_ruler.from_disk(out_file)
for pattern in ruler.patterns: for pattern in ruler.patterns:
assert pattern in new_ruler.patterns assert pattern in new_ruler.patterns
assert len(new_ruler) == len(ruler) assert len(new_ruler) == len(ruler)