* Add error if try to access head and not is_parsed

This commit is contained in:
Matthew Honnibal 2015-01-25 15:33:54 +11:00
parent e2ea0fb47a
commit 7431c133d8
4 changed files with 22 additions and 1 deletions

View File

@ -253,6 +253,7 @@ cdef class EnPosTagger:
tokens.data[i].tag = arg_max(scores, self.model.n_classes) tokens.data[i].tag = arg_max(scores, self.model.n_classes)
self.set_morph(i, tokens.data) self.set_morph(i, tokens.data)
tokens._tag_strings = self.tag_names tokens._tag_strings = self.tag_names
tokens.is_tagged = True
def train(self, Tokens tokens, object golds): def train(self, Tokens tokens, object golds):
cdef int i cdef int i

View File

@ -83,6 +83,7 @@ cdef class GreedyParser:
tokens._dep_strings = [None] * len(self.moves.label_ids) tokens._dep_strings = [None] * len(self.moves.label_ids)
for label, id_ in self.moves.label_ids.items(): for label, id_ in self.moves.label_ids.items():
tokens._dep_strings[id_] = label tokens._dep_strings[id_] = label
tokens.is_parsed = True
return 0 return 0
def train_sent(self, Tokens tokens, list gold_heads, list gold_labels): def train_sent(self, Tokens tokens, list gold_heads, list gold_labels):

View File

@ -38,6 +38,9 @@ cdef class Tokens:
cdef list _tag_strings cdef list _tag_strings
cdef list _dep_strings cdef list _dep_strings
cdef public bint is_tagged
cdef public bint is_parsed
cdef int length cdef int length
cdef int max_length cdef int max_length
@ -66,6 +69,5 @@ cdef class Token:
cdef readonly univ_tag_t pos cdef readonly univ_tag_t pos
cdef readonly attr_t tag cdef readonly attr_t tag
cdef readonly attr_t dep cdef readonly attr_t dep
cdef readonly ndarray repvec cdef readonly ndarray repvec
cdef readonly unicode string cdef readonly unicode string

View File

@ -84,6 +84,8 @@ cdef class Tokens:
self.data = data_start + PADDING self.data = data_start + PADDING
self.max_length = size self.max_length = size
self.length = 0 self.length = 0
self.is_tagged = False
self.is_parsed = False
self._tag_strings = [] # These will be set by the POS tagger and parser self._tag_strings = [] # These will be set by the POS tagger and parser
self._dep_strings = [] # The strings are arbitrary and model-specific. self._dep_strings = [] # The strings are arbitrary and model-specific.
@ -258,6 +260,10 @@ cdef class Token:
return Token(self._seq, self.i + i) return Token(self._seq, self.i + i)
def child(self, int i=1): def child(self, int i=1):
if not self._seq.is_parsed:
msg = _parse_unset_error
raise AttributeError(msg)
cdef const TokenC* t = &self._seq.data[self.i] cdef const TokenC* t = &self._seq.data[self.i]
if i == 0: if i == 0:
return self return self
@ -275,6 +281,9 @@ cdef class Token:
property head: property head:
"""The token predicted by the parser to be the head of the current token.""" """The token predicted by the parser to be the head of the current token."""
def __get__(self): def __get__(self):
if not self._seq.is_parsed:
msg = _parse_unset_error
raise AttributeError(msg)
cdef const TokenC* t = &self._seq.data[self.i] cdef const TokenC* t = &self._seq.data[self.i]
return Token(self._seq, self.i + t.head) return Token(self._seq, self.i + t.head)
@ -337,3 +346,11 @@ cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
if n < 1: if n < 1:
return i return i
return 0 return 0
_parse_unset_error = """Text has not been parsed, so cannot access head, child or sibling.
Check that the parser data is installed.
Check that the parse=True argument was set in the call to English.__call__
"""