mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Add getters_for_spans and getters_for_tokens attributes to Doc. Fix docstring
This commit is contained in:
parent
ae11ea8240
commit
62230dd13a
|
@ -89,11 +89,14 @@ cdef class Doc:
|
||||||
A Vocabulary object, which must match any models you want to
|
A Vocabulary object, which must match any models you want to
|
||||||
use (e.g. tokenizer, parser, entity recognizer).
|
use (e.g. tokenizer, parser, entity recognizer).
|
||||||
|
|
||||||
orths_and_spaces:
|
words:
|
||||||
A list of tokens in the document as a sequence of
|
A list of unicode strings to add to the document as words. If None,
|
||||||
`(orth_id, has_space)` tuples, where `orth_id` is an
|
defaults to empty list.
|
||||||
integer and `has_space` is a boolean, indicating whether the
|
|
||||||
token has a trailing space.
|
spaces:
|
||||||
|
A list of boolean values, of the same length as words. True
|
||||||
|
means that the word is followed by a space, False means it is not.
|
||||||
|
If None, defaults to [True]*len(words)
|
||||||
'''
|
'''
|
||||||
self.vocab = vocab
|
self.vocab = vocab
|
||||||
size = 20
|
size = 20
|
||||||
|
@ -112,6 +115,8 @@ cdef class Doc:
|
||||||
self.length = 0
|
self.length = 0
|
||||||
self.is_tagged = False
|
self.is_tagged = False
|
||||||
self.is_parsed = False
|
self.is_parsed = False
|
||||||
|
self.getters_for_tokens = {}
|
||||||
|
self.getters_for_spans = {}
|
||||||
self._py_tokens = []
|
self._py_tokens = []
|
||||||
self._vector = None
|
self._vector = None
|
||||||
self.noun_chunks_iterator = CHUNKERS.get(self.vocab.lang)
|
self.noun_chunks_iterator = CHUNKERS.get(self.vocab.lang)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user