mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-11 16:52:21 +03:00
* Fix loading of gazetteer.json file
This commit is contained in:
parent
9c667b7f15
commit
cd7d1682cd
|
@ -101,21 +101,28 @@ cdef class Matcher:
|
||||||
|
|
||||||
def __init__(self, vocab, patterns):
|
def __init__(self, vocab, patterns):
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self.patterns = <Pattern**>self.mem.alloc(len(patterns), sizeof(Pattern*))
|
n_patterns = sum([len(specs) for etype, attrs, specs in patterns.values()])
|
||||||
for i, (entity_key, (etype, attrs, specs)) in enumerate(sorted(patterns.items())):
|
self.patterns = <Pattern**>self.mem.alloc(n_patterns, sizeof(Pattern*))
|
||||||
|
cdef int i = 0
|
||||||
|
for entity_key, (etype, attrs, specs) in sorted(patterns.items()):
|
||||||
if isinstance(entity_key, basestring):
|
if isinstance(entity_key, basestring):
|
||||||
entity_key = vocab.strings[entity_key]
|
entity_key = vocab.strings[entity_key]
|
||||||
if isinstance(etype, basestring):
|
if isinstance(etype, basestring):
|
||||||
etype = vocab.strings[etype]
|
etype = vocab.strings[etype]
|
||||||
specs = _convert_strings(specs, vocab.strings)
|
# TODO: Do something more clever about multiple patterns for single
|
||||||
self.patterns[i] = init_pattern(self.mem, specs, etype)
|
# entity
|
||||||
|
for spec in specs:
|
||||||
|
spec = _convert_strings(spec, vocab.strings)
|
||||||
|
self.patterns[i] = init_pattern(self.mem, spec, etype)
|
||||||
|
i += 1
|
||||||
self.n_patterns = len(patterns)
|
self.n_patterns = len(patterns)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_dir(cls, vocab, data_dir):
|
def from_dir(cls, vocab, data_dir):
|
||||||
patterns_loc = path.join(data_dir, 'ner', 'patterns.json')
|
patterns_loc = path.join(data_dir, 'vocab', 'gazetteer.json')
|
||||||
if path.exists(patterns_loc):
|
if path.exists(patterns_loc):
|
||||||
patterns = json.loads(open(patterns_loc))
|
patterns_data = open(patterns_loc).read()
|
||||||
|
patterns = json.loads(patterns_data)
|
||||||
return cls(vocab, patterns)
|
return cls(vocab, patterns)
|
||||||
else:
|
else:
|
||||||
return cls(vocab, {})
|
return cls(vocab, {})
|
||||||
|
|
Loading…
Reference in New Issue
Block a user