* Add test and test_with_ws attributes.

This commit is contained in:
Matthew Honnibal 2015-09-13 10:27:42 +10:00
parent 968d26a6cc
commit c08f10083c
3 changed files with 28 additions and 0 deletions

View File

@ -122,6 +122,14 @@ cdef class Doc:
def string(self):
return u''.join([t.string for t in self])
@property
def text_wth_ws(self):
return u''.join([t.text_with_ws for t in self])
@property
def text(self):
return u' '.join(t.text for t in self)
property ents:
def __get__(self):
"""Yields named-entity Span objects.

View File

@ -52,6 +52,14 @@ cdef class Span:
def merge(self, unicode tag, unicode lemma, unicode ent_type):
self._seq.merge(self[0].idx, self[-1].idx + len(self[-1]), tag, lemma, ent_type)
property text:
def __get__(self):
return u' '.join([t.text for t in self])
property text_with_ws:
def __get__(self):
return u''.join([t.text_with_ws for t in self])
property root:
"""The first ancestor of the first word of the span that has its head
outside the span.

View File

@ -61,6 +61,18 @@ cdef class Token:
else:
return orth
property text:
def __get__(self):
return self.orth_
property text_with_ws:
def __get__(self):
cdef unicode orth = self.vocab.strings[self.c.lex.orth]
if self.c.spacy:
return orth + u' '
else:
return orth
property prob:
def __get__(self):
return self.c.lex.prob