mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Fix Issue #599, by considering empty documents to be parsed and tagged. Implementation is a bit dodgy.
This commit is contained in:
parent
b6b01d4680
commit
f292f7f0e6
|
@ -151,6 +151,11 @@ cdef class Doc:
|
||||||
# must be created.
|
# must be created.
|
||||||
self.push_back(
|
self.push_back(
|
||||||
<const LexemeC*>self.vocab.get(self.mem, orth), has_space)
|
<const LexemeC*>self.vocab.get(self.mem, orth), has_space)
|
||||||
|
# Tough to decide on policy for this. Is an empty doc tagged and parsed?
|
||||||
|
# There's no information we'd like to add to it, so I guess so?
|
||||||
|
if self.length == 0:
|
||||||
|
self.is_tagged = True
|
||||||
|
self.is_parsed = True
|
||||||
|
|
||||||
def __getitem__(self, object i):
|
def __getitem__(self, object i):
|
||||||
'''
|
'''
|
||||||
|
@ -430,6 +435,10 @@ cdef class Doc:
|
||||||
yield Span(self, start, self.length)
|
yield Span(self, start, self.length)
|
||||||
|
|
||||||
cdef int push_back(self, LexemeOrToken lex_or_tok, bint has_space) except -1:
|
cdef int push_back(self, LexemeOrToken lex_or_tok, bint has_space) except -1:
|
||||||
|
if self.length == 0:
|
||||||
|
# Flip these to false when we see the first token.
|
||||||
|
self.is_tagged = False
|
||||||
|
self.is_parsed = False
|
||||||
if self.length == self.max_length:
|
if self.length == self.max_length:
|
||||||
self._realloc(self.length * 2)
|
self._realloc(self.length * 2)
|
||||||
cdef TokenC* t = &self.c[self.length]
|
cdef TokenC* t = &self.c[self.length]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user