* Add error if try to access head and not is_parsed

This commit is contained in:
Matthew Honnibal 2015-01-25 15:33:54 +11:00
parent e2ea0fb47a
commit 7431c133d8
4 changed files with 22 additions and 1 deletions

View File

@ -253,6 +253,7 @@ cdef class EnPosTagger:
tokens.data[i].tag = arg_max(scores, self.model.n_classes)
self.set_morph(i, tokens.data)
tokens._tag_strings = self.tag_names
tokens.is_tagged = True
def train(self, Tokens tokens, object golds):
cdef int i

View File

@ -83,6 +83,7 @@ cdef class GreedyParser:
tokens._dep_strings = [None] * len(self.moves.label_ids)
for label, id_ in self.moves.label_ids.items():
tokens._dep_strings[id_] = label
tokens.is_parsed = True
return 0
def train_sent(self, Tokens tokens, list gold_heads, list gold_labels):

View File

@ -38,6 +38,9 @@ cdef class Tokens:
cdef list _tag_strings
cdef list _dep_strings
cdef public bint is_tagged
cdef public bint is_parsed
cdef int length
cdef int max_length
@ -66,6 +69,5 @@ cdef class Token:
cdef readonly univ_tag_t pos
cdef readonly attr_t tag
cdef readonly attr_t dep
cdef readonly ndarray repvec
cdef readonly unicode string

View File

@ -84,6 +84,8 @@ cdef class Tokens:
self.data = data_start + PADDING
self.max_length = size
self.length = 0
self.is_tagged = False
self.is_parsed = False
self._tag_strings = [] # These will be set by the POS tagger and parser
self._dep_strings = [] # The strings are arbitrary and model-specific.
@ -258,6 +260,10 @@ cdef class Token:
return Token(self._seq, self.i + i)
def child(self, int i=1):
if not self._seq.is_parsed:
msg = _parse_unset_error
raise AttributeError(msg)
cdef const TokenC* t = &self._seq.data[self.i]
if i == 0:
return self
@ -275,6 +281,9 @@ cdef class Token:
property head:
"""The token predicted by the parser to be the head of the current token."""
def __get__(self):
if not self._seq.is_parsed:
msg = _parse_unset_error
raise AttributeError(msg)
cdef const TokenC* t = &self._seq.data[self.i]
return Token(self._seq, self.i + t.head)
@ -337,3 +346,11 @@ cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
if n < 1:
return i
return 0
_parse_unset_error = """Text has not been parsed, so cannot access head, child or sibling.
Check that the parser data is installed.
Check that the parse=True argument was set in the call to English.__call__
"""