diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 16f0d2f46..f9552b6eb 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -122,6 +122,14 @@ cdef class Doc: def string(self): return u''.join([t.string for t in self]) + @property + def text_wth_ws(self): + return u''.join([t.text_with_ws for t in self]) + + @property + def text(self): + return u' '.join(t.text for t in self) + property ents: def __get__(self): """Yields named-entity Span objects. diff --git a/spacy/tokens/spans.pyx b/spacy/tokens/spans.pyx index e2aa1a7f9..d9e4fbf0e 100644 --- a/spacy/tokens/spans.pyx +++ b/spacy/tokens/spans.pyx @@ -52,6 +52,14 @@ cdef class Span: def merge(self, unicode tag, unicode lemma, unicode ent_type): self._seq.merge(self[0].idx, self[-1].idx + len(self[-1]), tag, lemma, ent_type) + property text: + def __get__(self): + return u' '.join([t.text for t in self]) + + property text_with_ws: + def __get__(self): + return u''.join([t.text_with_ws for t in self]) + property root: """The first ancestor of the first word of the span that has its head outside the span. diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index f3b9aa056..e3e78838f 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -61,6 +61,18 @@ cdef class Token: else: return orth + property text: + def __get__(self): + return self.orth_ + + property text_with_ws: + def __get__(self): + cdef unicode orth = self.vocab.strings[self.c.lex.orth] + if self.c.spacy: + return orth + u' ' + else: + return orth + property prob: def __get__(self): return self.c.lex.prob