mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
* Remove duplicate get_lex_attr method from doc.pyx
This commit is contained in:
parent
e49c7f1478
commit
6b13e7227c
|
@ -12,6 +12,7 @@ from ..attrs cimport POS, LEMMA, TAG, DEP, HEAD, SPACY, ENT_IOB, ENT_TYPE
|
||||||
from ..parts_of_speech import UNIV_POS_NAMES
|
from ..parts_of_speech import UNIV_POS_NAMES
|
||||||
from ..parts_of_speech cimport CONJ, PUNCT
|
from ..parts_of_speech cimport CONJ, PUNCT
|
||||||
from ..lexeme cimport check_flag
|
from ..lexeme cimport check_flag
|
||||||
|
from ..lexeme cimport get_attr as get_lex_attr
|
||||||
from .spans import Span
|
from .spans import Span
|
||||||
from ..structs cimport UniStr
|
from ..structs cimport UniStr
|
||||||
from .token cimport Token
|
from .token cimport Token
|
||||||
|
@ -48,31 +49,6 @@ cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil:
|
||||||
return get_lex_attr(token.lex, feat_name)
|
return get_lex_attr(token.lex, feat_name)
|
||||||
|
|
||||||
|
|
||||||
cdef attr_t get_lex_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
|
|
||||||
if feat_name < (sizeof(flags_t) * 8):
|
|
||||||
return check_flag(lex, feat_name)
|
|
||||||
elif feat_name == ID:
|
|
||||||
return lex.id
|
|
||||||
elif feat_name == ORTH:
|
|
||||||
return lex.orth
|
|
||||||
elif feat_name == LOWER:
|
|
||||||
return lex.lower
|
|
||||||
elif feat_name == NORM:
|
|
||||||
return lex.norm
|
|
||||||
elif feat_name == SHAPE:
|
|
||||||
return lex.shape
|
|
||||||
elif feat_name == PREFIX:
|
|
||||||
return lex.prefix
|
|
||||||
elif feat_name == SUFFIX:
|
|
||||||
return lex.suffix
|
|
||||||
elif feat_name == LENGTH:
|
|
||||||
return lex.length
|
|
||||||
elif feat_name == CLUSTER:
|
|
||||||
return lex.cluster
|
|
||||||
else:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
cdef class Doc:
|
cdef class Doc:
|
||||||
"""
|
"""
|
||||||
Container class for annotated text. Constructed via English.__call__ or
|
Container class for annotated text. Constructed via English.__call__ or
|
||||||
|
@ -97,15 +73,21 @@ cdef class Doc:
|
||||||
self._py_tokens = []
|
self._py_tokens = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_ids(cls, Vocab vocab, ids, spaces):
|
def from_ids(cls, Vocab vocab, orths, spaces):
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef const LexemeC* lex
|
cdef const LexemeC* lex
|
||||||
cdef Doc self = cls(vocab)
|
cdef Doc self = cls(vocab)
|
||||||
cdef bint space = 0
|
cdef bint space = 0
|
||||||
for i in range(len(ids)):
|
cdef attr_t orth
|
||||||
lex = self.vocab.lexemes.at(ids[i])
|
for i in range(len(orths)):
|
||||||
space = spaces[i]
|
orth = orths[i]
|
||||||
self.push_back(lex, space)
|
lex = <LexemeC*>self.vocab._by_orth.get(orth)
|
||||||
|
if lex != NULL:
|
||||||
|
assert lex.orth == orth
|
||||||
|
space = spaces[i]
|
||||||
|
self.push_back(lex, space)
|
||||||
|
else:
|
||||||
|
raise Exception('Lexeme not found: %d' % orth)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __getitem__(self, object i):
|
def __getitem__(self, object i):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user