* Clean up handling of dep_strings and ent_strings, using StringStore to encode the label names.

This commit is contained in:
Matthew Honnibal 2015-03-14 11:10:27 -04:00
parent 9061bbaf61
commit 801bf14f4f

View File

@ -104,14 +104,16 @@ class English(object):
@property @property
def parser(self): def parser(self):
if self._parser is None: if self._parser is None:
self._parser = GreedyParser(path.join(self._data_dir, 'deps'), self._parser = GreedyParser(self.vocab.strings,
path.join(self._data_dir, 'deps'),
self.ParserTransitionSystem) self.ParserTransitionSystem)
return self._parser return self._parser
@property @property
def entity(self): def entity(self):
if self._entity is None: if self._entity is None:
self._entity = GreedyParser(path.join(self._data_dir, 'ner'), self._entity = GreedyParser(self.vocab.strings,
path.join(self._data_dir, 'ner'),
self.EntityTransitionSystem) self.EntityTransitionSystem)
return self._entity return self._entity
@ -180,13 +182,7 @@ class English(object):
if parse and self.has_parser_model: if parse and self.has_parser_model:
self.parser(tokens) self.parser(tokens)
if entity and self.has_entity_model: if entity and self.has_entity_model:
# TODO: Clean this up
self.entity(tokens) self.entity(tokens)
ent_strings = [None] * (max(self.entity.moves.label_ids.values()) + 1)
for label, i in self.entity.moves.label_ids.items():
if i >= 0:
ent_strings[i] = label
tokens._ent_strings = tuple(ent_strings)
return tokens return tokens
@property @property