mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Fix handling of old entity ruler files
Expected an `entity_ruler.jsonl` file in the top-level model directory, so the path passed to from_disk by default (model path plus componentn name), but with the suffix ".jsonl".
This commit is contained in:
parent
874d914a44
commit
570ab1f481
|
@ -266,8 +266,9 @@ class EntityRuler(object):
|
||||||
DOCS: https://spacy.io/api/entityruler#from_disk
|
DOCS: https://spacy.io/api/entityruler#from_disk
|
||||||
"""
|
"""
|
||||||
path = ensure_path(path)
|
path = ensure_path(path)
|
||||||
if path.is_file():
|
depr_patterns_path = path.with_suffix(".jsonl")
|
||||||
patterns = srsly.read_jsonl(path)
|
if depr_patterns_path.is_file():
|
||||||
|
patterns = srsly.read_jsonl(depr_patterns_path)
|
||||||
self.add_patterns(patterns)
|
self.add_patterns(patterns)
|
||||||
else:
|
else:
|
||||||
cfg = {}
|
cfg = {}
|
||||||
|
|
|
@ -62,10 +62,9 @@ def test_entity_ruler_from_disk_old_format_safe(patterns, en_vocab):
|
||||||
nlp = Language(vocab=en_vocab)
|
nlp = Language(vocab=en_vocab)
|
||||||
ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
|
ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
|
||||||
with make_tempdir() as tmpdir:
|
with make_tempdir() as tmpdir:
|
||||||
out_file = tmpdir / "entity_ruler.jsonl"
|
out_file = tmpdir / "entity_ruler"
|
||||||
srsly.write_jsonl(out_file, ruler.patterns)
|
srsly.write_jsonl(out_file.with_suffix(".jsonl"), ruler.patterns)
|
||||||
new_ruler = EntityRuler(nlp)
|
new_ruler = EntityRuler(nlp).from_disk(out_file)
|
||||||
new_ruler = new_ruler.from_disk(out_file)
|
|
||||||
for pattern in ruler.patterns:
|
for pattern in ruler.patterns:
|
||||||
assert pattern in new_ruler.patterns
|
assert pattern in new_ruler.patterns
|
||||||
assert len(new_ruler) == len(ruler)
|
assert len(new_ruler) == len(ruler)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user