* Move POS tag definitions to parts_of_speech.pxd

2025-08-28 07:54:56 +03:00 · 2015-01-25 16:31:07 +11:00 · 2015-01-25 16:31:07 +11:00 · 12b034e3ef
commit 12b034e3ef
parent 7431c133d8
7 changed files with 15 additions and 50 deletions
--- a/spacy/en/pos.pxd
+++ b/spacy/en/pos.pxd
@ -4,7 +4,7 @@ from cymem.cymem cimport Pool
 from .._ml cimport Model
 from ..strings cimport StringStore
 from ..structs cimport TokenC, LexemeC, Morphology, PosTag
-from ..typedefs cimport univ_tag_t
+from ..parts_of_speech cimport univ_pos_t
 from .lemmatizer import Lemmatizer
@ -21,5 +21,5 @@ cdef class EnPosTagger:
    cdef readonly int n_tags
    cdef int set_morph(self, const int i, TokenC* tokens) except -1
-    cdef int lemmatize(self, const univ_tag_t pos, const LexemeC* lex) except -1
+    cdef int lemmatize(self, const univ_pos_t pos, const LexemeC* lex) except -1
--- a/spacy/en/pos.pyx
+++ b/spacy/en/pos.pyx
@ -8,9 +8,9 @@ from libc.string cimport memset
 from cymem.cymem cimport Address
 from thinc.typedefs cimport atom_t, weight_t
-from ..typedefs cimport univ_tag_t
+from ..parts_of_speech cimport univ_pos_t
-from ..typedefs cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT, VERB
+from ..parts_of_speech cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT, VERB
-from ..typedefs cimport X, PUNCT, EOL
+from ..parts_of_speech cimport X, PUNCT, EOL
 from ..typedefs cimport id_t
 from ..structs cimport TokenC, Morphology, LexemeC
 from ..tokens cimport Tokens
@ -282,7 +282,7 @@ cdef class EnPosTagger:
        tokens[i].lemma = cached.lemma
        tokens[i].morph = cached.morph
-    cdef int lemmatize(self, const univ_tag_t pos, const LexemeC* lex) except -1:
+    cdef int lemmatize(self, const univ_pos_t pos, const LexemeC* lex) except -1:
        if self.lemmatizer is None:
            return lex.orth
        cdef unicode py_string = self.strings[lex.orth]
--- a/spacy/structs.pxd
+++ b/spacy/structs.pxd
@ -1,6 +1,7 @@
 from libc.stdint cimport uint8_t, uint32_t
-from .typedefs cimport flags_t, attr_t, id_t, hash_t, univ_tag_t
+from .typedefs cimport flags_t, attr_t, id_t, hash_t
 from .parts_of_speech cimport univ_pos_t
 cdef struct LexemeC:
@ -37,13 +38,13 @@ cdef struct Morphology:
 cdef struct PosTag:
    Morphology morph
    int id
-    univ_tag_t pos
+    univ_pos_t pos
 cdef struct TokenC:
    const LexemeC* lex
    Morphology morph
-    univ_tag_t pos
+    univ_pos_t pos
    int tag
    int idx
    int lemma
--- a/spacy/tokens.pxd
+++ b/spacy/tokens.pxd
@ -6,7 +6,8 @@ cimport numpy
 from cymem.cymem cimport Pool
 from thinc.typedefs cimport atom_t
-from .typedefs cimport flags_t, attr_id_t, attr_t, univ_tag_t
+from .typedefs cimport flags_t, attr_id_t, attr_t
 from .parts_of_speech cimport univ_pos_t
 from .structs cimport Morphology, TokenC, LexemeC
 from .vocab cimport Vocab
 from .strings cimport StringStore
@ -66,7 +67,7 @@ cdef class Token:
    cdef readonly float sentiment
    cdef readonly attr_t flags
    cdef readonly attr_t lemma
-    cdef readonly univ_tag_t pos
+    cdef readonly univ_pos_t pos
    cdef readonly attr_t tag
    cdef readonly attr_t dep
    cdef readonly ndarray repvec
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@ -8,7 +8,7 @@ from .typedefs cimport attr_id_t, attr_t
 from .typedefs cimport LEMMA
 from .typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
 from .typedefs cimport POS, LEMMA
-from .typedefs import UNIV_TAG_NAMES
+from .parts_of_speech import UNIV_POS_NAMES
 from unidecode import unidecode
@ -325,7 +325,7 @@ cdef class Token:
    property pos_:
        def __get__(self):
-            id_to_string = {id_: string for string, id_ in UNIV_TAG_NAMES.items()}
+            id_to_string = {id_: string for string, id_ in UNIV_POS_NAMES.items()}
            return id_to_string[self.pos]
    property tag_:
--- a/spacy/typedefs.pxd
+++ b/spacy/typedefs.pxd
@ -2,25 +2,6 @@ from libc.stdint cimport uint16_t, uint32_t, uint64_t, uintptr_t
 from libc.stdint cimport uint8_t
 # Google universal tag set
 cpdef enum univ_tag_t:
    NO_TAG
    ADJ
    ADV
    ADP
    CONJ
    DET
    NOUN
    NUM
    PRON
    PRT
    VERB
    X
    PUNCT
    EOL
    N_UNIV_TAGS
 # Reserve 64 values for flag features
 cpdef enum attr_id_t:
    FLAG0
--- a/spacy/typedefs.pyx
+++ b/spacy/typedefs.pyx
@ -1,19 +1 @@
 from __future__ import unicode_literals
 UNIV_TAG_NAMES = {
    "NO_TAG": NO_TAG,
    "ADJ": ADJ,
    "ADV": ADV,
    "ADP": ADP,
    "CONJ": CONJ,
    "DET": DET,
    "NOUN": NOUN,
    "NUM": NUM,
    "PRON": PRON,
    "PRT": PRT,
    "VERB": VERB,
    "X": X,
    "PUNCT": PUNCT,
    "EOL": EOL
 }