* Add test and test_with_ws attributes.

This commit is contained in:
Matthew Honnibal 2015-09-13 10:27:42 +10:00
parent 968d26a6cc
commit c08f10083c
3 changed files with 28 additions and 0 deletions

View File

@ -122,6 +122,14 @@ cdef class Doc:
def string(self): def string(self):
return u''.join([t.string for t in self]) return u''.join([t.string for t in self])
@property
def text_wth_ws(self):
return u''.join([t.text_with_ws for t in self])
@property
def text(self):
return u' '.join(t.text for t in self)
property ents: property ents:
def __get__(self): def __get__(self):
"""Yields named-entity Span objects. """Yields named-entity Span objects.

View File

@ -52,6 +52,14 @@ cdef class Span:
def merge(self, unicode tag, unicode lemma, unicode ent_type): def merge(self, unicode tag, unicode lemma, unicode ent_type):
self._seq.merge(self[0].idx, self[-1].idx + len(self[-1]), tag, lemma, ent_type) self._seq.merge(self[0].idx, self[-1].idx + len(self[-1]), tag, lemma, ent_type)
property text:
def __get__(self):
return u' '.join([t.text for t in self])
property text_with_ws:
def __get__(self):
return u''.join([t.text_with_ws for t in self])
property root: property root:
"""The first ancestor of the first word of the span that has its head """The first ancestor of the first word of the span that has its head
outside the span. outside the span.

View File

@ -61,6 +61,18 @@ cdef class Token:
else: else:
return orth return orth
property text:
def __get__(self):
return self.orth_
property text_with_ws:
def __get__(self):
cdef unicode orth = self.vocab.strings[self.c.lex.orth]
if self.c.spacy:
return orth + u' '
else:
return orth
property prob: property prob:
def __get__(self): def __get__(self):
return self.c.lex.prob return self.c.lex.prob