mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
* Add error if try to access head and not is_parsed
This commit is contained in:
parent
e2ea0fb47a
commit
7431c133d8
|
@ -253,6 +253,7 @@ cdef class EnPosTagger:
|
|||
tokens.data[i].tag = arg_max(scores, self.model.n_classes)
|
||||
self.set_morph(i, tokens.data)
|
||||
tokens._tag_strings = self.tag_names
|
||||
tokens.is_tagged = True
|
||||
|
||||
def train(self, Tokens tokens, object golds):
|
||||
cdef int i
|
||||
|
|
|
@ -83,6 +83,7 @@ cdef class GreedyParser:
|
|||
tokens._dep_strings = [None] * len(self.moves.label_ids)
|
||||
for label, id_ in self.moves.label_ids.items():
|
||||
tokens._dep_strings[id_] = label
|
||||
tokens.is_parsed = True
|
||||
return 0
|
||||
|
||||
def train_sent(self, Tokens tokens, list gold_heads, list gold_labels):
|
||||
|
|
|
@ -38,6 +38,9 @@ cdef class Tokens:
|
|||
cdef list _tag_strings
|
||||
cdef list _dep_strings
|
||||
|
||||
cdef public bint is_tagged
|
||||
cdef public bint is_parsed
|
||||
|
||||
cdef int length
|
||||
cdef int max_length
|
||||
|
||||
|
@ -66,6 +69,5 @@ cdef class Token:
|
|||
cdef readonly univ_tag_t pos
|
||||
cdef readonly attr_t tag
|
||||
cdef readonly attr_t dep
|
||||
|
||||
cdef readonly ndarray repvec
|
||||
cdef readonly unicode string
|
||||
|
|
|
@ -84,6 +84,8 @@ cdef class Tokens:
|
|||
self.data = data_start + PADDING
|
||||
self.max_length = size
|
||||
self.length = 0
|
||||
self.is_tagged = False
|
||||
self.is_parsed = False
|
||||
self._tag_strings = [] # These will be set by the POS tagger and parser
|
||||
self._dep_strings = [] # The strings are arbitrary and model-specific.
|
||||
|
||||
|
@ -258,6 +260,10 @@ cdef class Token:
|
|||
return Token(self._seq, self.i + i)
|
||||
|
||||
def child(self, int i=1):
|
||||
if not self._seq.is_parsed:
|
||||
msg = _parse_unset_error
|
||||
raise AttributeError(msg)
|
||||
|
||||
cdef const TokenC* t = &self._seq.data[self.i]
|
||||
if i == 0:
|
||||
return self
|
||||
|
@ -275,6 +281,9 @@ cdef class Token:
|
|||
property head:
|
||||
"""The token predicted by the parser to be the head of the current token."""
|
||||
def __get__(self):
|
||||
if not self._seq.is_parsed:
|
||||
msg = _parse_unset_error
|
||||
raise AttributeError(msg)
|
||||
cdef const TokenC* t = &self._seq.data[self.i]
|
||||
return Token(self._seq, self.i + t.head)
|
||||
|
||||
|
@ -337,3 +346,11 @@ cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
|
|||
if n < 1:
|
||||
return i
|
||||
return 0
|
||||
|
||||
|
||||
_parse_unset_error = """Text has not been parsed, so cannot access head, child or sibling.
|
||||
|
||||
Check that the parser data is installed.
|
||||
Check that the parse=True argument was set in the call to English.__call__
|
||||
"""
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user