mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
* Draft a from_orth method for Doc
This commit is contained in:
parent
a9149fdcbd
commit
dfdf19f6a9
|
@ -96,6 +96,20 @@ cdef class Doc:
|
||||||
self.is_parsed = False
|
self.is_parsed = False
|
||||||
self._py_tokens = []
|
self._py_tokens = []
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_orth(cls, Vocab vocab, attr_t[:] orths, attr_t[:] spaces):
|
||||||
|
cdef int i
|
||||||
|
cdef const LexemeC* lex
|
||||||
|
cdef Doc self = cls(vocab)
|
||||||
|
cdef unicode string
|
||||||
|
cdef UniStr new_orth_c
|
||||||
|
for i in range(len(orths)):
|
||||||
|
string = vocab.strings[orths[i]]
|
||||||
|
slice_unicode(&new_orth_c, string, 0, len(string))
|
||||||
|
lex = self.vocab.get(self.mem, &new_orth_c)
|
||||||
|
self.push_back(lex, spaces[i])
|
||||||
|
return self
|
||||||
|
|
||||||
def __getitem__(self, object i):
|
def __getitem__(self, object i):
|
||||||
"""Get a token.
|
"""Get a token.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user