mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
* Move POS tag definitions to parts_of_speech.pxd
This commit is contained in:
parent
7431c133d8
commit
12b034e3ef
|
@ -4,7 +4,7 @@ from cymem.cymem cimport Pool
|
|||
from .._ml cimport Model
|
||||
from ..strings cimport StringStore
|
||||
from ..structs cimport TokenC, LexemeC, Morphology, PosTag
|
||||
from ..typedefs cimport univ_tag_t
|
||||
from ..parts_of_speech cimport univ_pos_t
|
||||
from .lemmatizer import Lemmatizer
|
||||
|
||||
|
||||
|
@ -21,5 +21,5 @@ cdef class EnPosTagger:
|
|||
cdef readonly int n_tags
|
||||
|
||||
cdef int set_morph(self, const int i, TokenC* tokens) except -1
|
||||
cdef int lemmatize(self, const univ_tag_t pos, const LexemeC* lex) except -1
|
||||
cdef int lemmatize(self, const univ_pos_t pos, const LexemeC* lex) except -1
|
||||
|
||||
|
|
|
@ -8,9 +8,9 @@ from libc.string cimport memset
|
|||
from cymem.cymem cimport Address
|
||||
from thinc.typedefs cimport atom_t, weight_t
|
||||
|
||||
from ..typedefs cimport univ_tag_t
|
||||
from ..typedefs cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT, VERB
|
||||
from ..typedefs cimport X, PUNCT, EOL
|
||||
from ..parts_of_speech cimport univ_pos_t
|
||||
from ..parts_of_speech cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT, VERB
|
||||
from ..parts_of_speech cimport X, PUNCT, EOL
|
||||
from ..typedefs cimport id_t
|
||||
from ..structs cimport TokenC, Morphology, LexemeC
|
||||
from ..tokens cimport Tokens
|
||||
|
@ -282,7 +282,7 @@ cdef class EnPosTagger:
|
|||
tokens[i].lemma = cached.lemma
|
||||
tokens[i].morph = cached.morph
|
||||
|
||||
cdef int lemmatize(self, const univ_tag_t pos, const LexemeC* lex) except -1:
|
||||
cdef int lemmatize(self, const univ_pos_t pos, const LexemeC* lex) except -1:
|
||||
if self.lemmatizer is None:
|
||||
return lex.orth
|
||||
cdef unicode py_string = self.strings[lex.orth]
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from libc.stdint cimport uint8_t, uint32_t
|
||||
|
||||
from .typedefs cimport flags_t, attr_t, id_t, hash_t, univ_tag_t
|
||||
from .typedefs cimport flags_t, attr_t, id_t, hash_t
|
||||
from .parts_of_speech cimport univ_pos_t
|
||||
|
||||
|
||||
cdef struct LexemeC:
|
||||
|
@ -37,13 +38,13 @@ cdef struct Morphology:
|
|||
cdef struct PosTag:
|
||||
Morphology morph
|
||||
int id
|
||||
univ_tag_t pos
|
||||
univ_pos_t pos
|
||||
|
||||
|
||||
cdef struct TokenC:
|
||||
const LexemeC* lex
|
||||
Morphology morph
|
||||
univ_tag_t pos
|
||||
univ_pos_t pos
|
||||
int tag
|
||||
int idx
|
||||
int lemma
|
||||
|
|
|
@ -6,7 +6,8 @@ cimport numpy
|
|||
from cymem.cymem cimport Pool
|
||||
from thinc.typedefs cimport atom_t
|
||||
|
||||
from .typedefs cimport flags_t, attr_id_t, attr_t, univ_tag_t
|
||||
from .typedefs cimport flags_t, attr_id_t, attr_t
|
||||
from .parts_of_speech cimport univ_pos_t
|
||||
from .structs cimport Morphology, TokenC, LexemeC
|
||||
from .vocab cimport Vocab
|
||||
from .strings cimport StringStore
|
||||
|
@ -66,7 +67,7 @@ cdef class Token:
|
|||
cdef readonly float sentiment
|
||||
cdef readonly attr_t flags
|
||||
cdef readonly attr_t lemma
|
||||
cdef readonly univ_tag_t pos
|
||||
cdef readonly univ_pos_t pos
|
||||
cdef readonly attr_t tag
|
||||
cdef readonly attr_t dep
|
||||
cdef readonly ndarray repvec
|
||||
|
|
|
@ -8,7 +8,7 @@ from .typedefs cimport attr_id_t, attr_t
|
|||
from .typedefs cimport LEMMA
|
||||
from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
||||
from .typedefs cimport POS, LEMMA
|
||||
from .typedefs import UNIV_TAG_NAMES
|
||||
from .parts_of_speech import UNIV_POS_NAMES
|
||||
|
||||
from unidecode import unidecode
|
||||
|
||||
|
@ -325,7 +325,7 @@ cdef class Token:
|
|||
|
||||
property pos_:
|
||||
def __get__(self):
|
||||
id_to_string = {id_: string for string, id_ in UNIV_TAG_NAMES.items()}
|
||||
id_to_string = {id_: string for string, id_ in UNIV_POS_NAMES.items()}
|
||||
return id_to_string[self.pos]
|
||||
|
||||
property tag_:
|
||||
|
|
|
@ -2,25 +2,6 @@ from libc.stdint cimport uint16_t, uint32_t, uint64_t, uintptr_t
|
|||
from libc.stdint cimport uint8_t
|
||||
|
||||
|
||||
# Google universal tag set
|
||||
cpdef enum univ_tag_t:
|
||||
NO_TAG
|
||||
ADJ
|
||||
ADV
|
||||
ADP
|
||||
CONJ
|
||||
DET
|
||||
NOUN
|
||||
NUM
|
||||
PRON
|
||||
PRT
|
||||
VERB
|
||||
X
|
||||
PUNCT
|
||||
EOL
|
||||
N_UNIV_TAGS
|
||||
|
||||
|
||||
# Reserve 64 values for flag features
|
||||
cpdef enum attr_id_t:
|
||||
FLAG0
|
||||
|
|
|
@ -1,19 +1 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
UNIV_TAG_NAMES = {
|
||||
"NO_TAG": NO_TAG,
|
||||
"ADJ": ADJ,
|
||||
"ADV": ADV,
|
||||
"ADP": ADP,
|
||||
"CONJ": CONJ,
|
||||
"DET": DET,
|
||||
"NOUN": NOUN,
|
||||
"NUM": NUM,
|
||||
"PRON": PRON,
|
||||
"PRT": PRT,
|
||||
"VERB": VERB,
|
||||
"X": X,
|
||||
"PUNCT": PUNCT,
|
||||
"EOL": EOL
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user