mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-01 03:33:12 +03:00
* Break up tokens.pyx into tokens/doc.pyx, tokens/token.pyx, tokens/spans.pyx
This commit is contained in:
parent
3ea8756c24
commit
6eef0bf9ab
|
@ -14,7 +14,7 @@ from ..parts_of_speech cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON
|
||||||
from ..parts_of_speech cimport PRT, VERB, X, PUNCT, EOL, SPACE
|
from ..parts_of_speech cimport PRT, VERB, X, PUNCT, EOL, SPACE
|
||||||
from ..typedefs cimport id_t
|
from ..typedefs cimport id_t
|
||||||
from ..structs cimport TokenC, Morphology, LexemeC
|
from ..structs cimport TokenC, Morphology, LexemeC
|
||||||
from ..tokens cimport Doc
|
from ..tokens.doc cimport Doc
|
||||||
from ..morphology cimport set_morph_from_dict
|
from ..morphology cimport set_morph_from_dict
|
||||||
from .._ml cimport arg_max
|
from .._ml cimport arg_max
|
||||||
|
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
from .tokens cimport Doc
|
|
||||||
from .typedefs cimport flags_t, attr_id_t, attr_t
|
|
||||||
from .parts_of_speech cimport univ_pos_t
|
|
||||||
from .structs cimport Morphology, TokenC, LexemeC
|
|
||||||
from .vocab cimport Vocab
|
|
||||||
from .strings cimport StringStore
|
|
||||||
|
|
||||||
|
|
||||||
cdef class Span:
|
|
||||||
cdef readonly Doc _seq
|
|
||||||
cdef public int i
|
|
||||||
cdef public int start
|
|
||||||
cdef public int end
|
|
||||||
cdef readonly int label
|
|
|
@ -12,7 +12,7 @@ from libc.string cimport memset
|
||||||
|
|
||||||
from itertools import combinations
|
from itertools import combinations
|
||||||
|
|
||||||
from ..tokens cimport TokenC
|
from ..structs cimport TokenC
|
||||||
|
|
||||||
from .stateclass cimport StateClass
|
from .stateclass cimport StateClass
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,8 @@ from .._ml cimport Model
|
||||||
|
|
||||||
from .arc_eager cimport TransitionSystem
|
from .arc_eager cimport TransitionSystem
|
||||||
|
|
||||||
from ..tokens cimport Doc, TokenC
|
from ..tokens.doc cimport Doc
|
||||||
|
from ..structs cimport TokenC
|
||||||
|
|
||||||
|
|
||||||
cdef class Parser:
|
cdef class Parser:
|
||||||
|
|
|
@ -32,7 +32,9 @@ from thinc.learner cimport LinearModel
|
||||||
from thinc.search cimport Beam
|
from thinc.search cimport Beam
|
||||||
from thinc.search cimport MaxViolation
|
from thinc.search cimport MaxViolation
|
||||||
|
|
||||||
from ..tokens cimport Doc, TokenC
|
from ..structs cimport TokenC
|
||||||
|
|
||||||
|
from ..tokens.doc cimport Doc
|
||||||
from ..strings cimport StringStore
|
from ..strings cimport StringStore
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ from cymem.cymem cimport Pool
|
||||||
from .typedefs cimport hash_t
|
from .typedefs cimport hash_t
|
||||||
from .structs cimport LexemeC, TokenC, Morphology, UniStr
|
from .structs cimport LexemeC, TokenC, Morphology, UniStr
|
||||||
from .strings cimport StringStore
|
from .strings cimport StringStore
|
||||||
from .tokens cimport Doc
|
from .tokens.doc cimport Doc
|
||||||
from .vocab cimport Vocab, _Cached
|
from .vocab cimport Vocab, _Cached
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ from .morphology cimport set_morph_from_dict
|
||||||
|
|
||||||
from . import util
|
from . import util
|
||||||
from .util import read_lang_data
|
from .util import read_lang_data
|
||||||
from .tokens import Doc
|
from .tokens.doc cimport Doc
|
||||||
|
|
||||||
|
|
||||||
cdef class Tokenizer:
|
cdef class Tokenizer:
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
from .doc cimport Doc
|
|
||||||
from .token cimport Token
|
|
||||||
from .spans cimport Spans
|
|
||||||
|
|
||||||
__all__ = [Doc, Token, Spans]
|
|
|
@ -1,5 +1,5 @@
|
||||||
from .doc import Doc
|
from .doc import Doc
|
||||||
from .token import Token
|
from .token import Token
|
||||||
from .spans import Spans
|
from .spans import Span
|
||||||
|
|
||||||
__all__ = [Doc, Token, Spans]
|
__all__ = [Doc, Token, Span]
|
||||||
|
|
|
@ -31,5 +31,3 @@ cdef class Doc:
|
||||||
cpdef np.ndarray to_array(self, object features)
|
cpdef np.ndarray to_array(self, object features)
|
||||||
|
|
||||||
cdef int set_parse(self, const TokenC* parsed) except -1
|
cdef int set_parse(self, const TokenC* parsed) except -1
|
||||||
|
|
||||||
|
|
||||||
|
|
9
spacy/tokens/spans.pxd
Normal file
9
spacy/tokens/spans.pxd
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
from .doc cimport Doc
|
||||||
|
|
||||||
|
|
||||||
|
cdef class Span:
|
||||||
|
cdef readonly Doc _seq
|
||||||
|
cdef public int i
|
||||||
|
cdef public int start
|
||||||
|
cdef public int end
|
||||||
|
cdef readonly int label
|
|
@ -1,6 +1,10 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from ..structs cimport Morphology, TokenC, LexemeC
|
||||||
|
from ..typedefs cimport flags_t, attr_id_t, attr_t
|
||||||
|
from ..parts_of_speech cimport univ_pos_t
|
||||||
|
|
||||||
|
|
||||||
cdef class Span:
|
cdef class Span:
|
||||||
"""A slice from a Doc object."""
|
"""A slice from a Doc object."""
|
|
@ -3,6 +3,18 @@ from cpython.mem cimport PyMem_Malloc, PyMem_Free
|
||||||
from ..lexeme cimport check_flag
|
from ..lexeme cimport check_flag
|
||||||
# Compiler crashes on memory view coercion without this. Should report bug.
|
# Compiler crashes on memory view coercion without this. Should report bug.
|
||||||
from cython.view cimport array as cvarray
|
from cython.view cimport array as cvarray
|
||||||
|
cimport numpy as np
|
||||||
|
np.import_array()
|
||||||
|
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
|
||||||
|
from ..parts_of_speech import UNIV_POS_NAMES
|
||||||
|
|
||||||
|
from ..typedefs cimport LEMMA
|
||||||
|
from ..typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
||||||
|
from ..typedefs cimport POS, LEMMA, TAG, DEP
|
||||||
|
from ..parts_of_speech cimport CONJ, PUNCT
|
||||||
|
|
||||||
|
|
||||||
cdef class Token:
|
cdef class Token:
|
||||||
|
@ -279,3 +291,6 @@ cdef class Token:
|
||||||
property dep_:
|
property dep_:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return self.vocab.strings[self.c.dep]
|
return self.vocab.strings[self.c.dep]
|
||||||
|
|
||||||
|
|
||||||
|
_pos_id_to_string = {id_: string for string, id_ in UNIV_POS_NAMES.items()}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user