mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
* Break up tokens.pyx into tokens/doc.pyx, tokens/token.pyx, tokens/spans.pyx
This commit is contained in:
parent
3ea8756c24
commit
6eef0bf9ab
|
@ -14,7 +14,7 @@ from ..parts_of_speech cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON
|
|||
from ..parts_of_speech cimport PRT, VERB, X, PUNCT, EOL, SPACE
|
||||
from ..typedefs cimport id_t
|
||||
from ..structs cimport TokenC, Morphology, LexemeC
|
||||
from ..tokens cimport Doc
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..morphology cimport set_morph_from_dict
|
||||
from .._ml cimport arg_max
|
||||
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
from .tokens cimport Doc
|
||||
from .typedefs cimport flags_t, attr_id_t, attr_t
|
||||
from .parts_of_speech cimport univ_pos_t
|
||||
from .structs cimport Morphology, TokenC, LexemeC
|
||||
from .vocab cimport Vocab
|
||||
from .strings cimport StringStore
|
||||
|
||||
|
||||
cdef class Span:
|
||||
cdef readonly Doc _seq
|
||||
cdef public int i
|
||||
cdef public int start
|
||||
cdef public int end
|
||||
cdef readonly int label
|
|
@ -12,7 +12,7 @@ from libc.string cimport memset
|
|||
|
||||
from itertools import combinations
|
||||
|
||||
from ..tokens cimport TokenC
|
||||
from ..structs cimport TokenC
|
||||
|
||||
from .stateclass cimport StateClass
|
||||
|
||||
|
|
|
@ -4,7 +4,8 @@ from .._ml cimport Model
|
|||
|
||||
from .arc_eager cimport TransitionSystem
|
||||
|
||||
from ..tokens cimport Doc, TokenC
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..structs cimport TokenC
|
||||
|
||||
|
||||
cdef class Parser:
|
||||
|
|
|
@ -32,7 +32,9 @@ from thinc.learner cimport LinearModel
|
|||
from thinc.search cimport Beam
|
||||
from thinc.search cimport MaxViolation
|
||||
|
||||
from ..tokens cimport Doc, TokenC
|
||||
from ..structs cimport TokenC
|
||||
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..strings cimport StringStore
|
||||
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ from cymem.cymem cimport Pool
|
|||
from .typedefs cimport hash_t
|
||||
from .structs cimport LexemeC, TokenC, Morphology, UniStr
|
||||
from .strings cimport StringStore
|
||||
from .tokens cimport Doc
|
||||
from .tokens.doc cimport Doc
|
||||
from .vocab cimport Vocab, _Cached
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ from .morphology cimport set_morph_from_dict
|
|||
|
||||
from . import util
|
||||
from .util import read_lang_data
|
||||
from .tokens import Doc
|
||||
from .tokens.doc cimport Doc
|
||||
|
||||
|
||||
cdef class Tokenizer:
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
from .doc cimport Doc
|
||||
from .token cimport Token
|
||||
from .spans cimport Spans
|
||||
|
||||
__all__ = [Doc, Token, Spans]
|
|
@ -1,5 +1,5 @@
|
|||
from .doc import Doc
|
||||
from .token import Token
|
||||
from .spans import Spans
|
||||
from .spans import Span
|
||||
|
||||
__all__ = [Doc, Token, Spans]
|
||||
__all__ = [Doc, Token, Span]
|
||||
|
|
|
@ -31,5 +31,3 @@ cdef class Doc:
|
|||
cpdef np.ndarray to_array(self, object features)
|
||||
|
||||
cdef int set_parse(self, const TokenC* parsed) except -1
|
||||
|
||||
|
||||
|
|
9
spacy/tokens/spans.pxd
Normal file
9
spacy/tokens/spans.pxd
Normal file
|
@ -0,0 +1,9 @@
|
|||
from .doc cimport Doc
|
||||
|
||||
|
||||
cdef class Span:
|
||||
cdef readonly Doc _seq
|
||||
cdef public int i
|
||||
cdef public int start
|
||||
cdef public int end
|
||||
cdef readonly int label
|
|
@ -1,6 +1,10 @@
|
|||
from __future__ import unicode_literals
|
||||
from collections import defaultdict
|
||||
|
||||
from ..structs cimport Morphology, TokenC, LexemeC
|
||||
from ..typedefs cimport flags_t, attr_id_t, attr_t
|
||||
from ..parts_of_speech cimport univ_pos_t
|
||||
|
||||
|
||||
cdef class Span:
|
||||
"""A slice from a Doc object."""
|
|
@ -3,6 +3,18 @@ from cpython.mem cimport PyMem_Malloc, PyMem_Free
|
|||
from ..lexeme cimport check_flag
|
||||
# Compiler crashes on memory view coercion without this. Should report bug.
|
||||
from cython.view cimport array as cvarray
|
||||
cimport numpy as np
|
||||
np.import_array()
|
||||
|
||||
import numpy
|
||||
|
||||
|
||||
from ..parts_of_speech import UNIV_POS_NAMES
|
||||
|
||||
from ..typedefs cimport LEMMA
|
||||
from ..typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
|
||||
from ..typedefs cimport POS, LEMMA, TAG, DEP
|
||||
from ..parts_of_speech cimport CONJ, PUNCT
|
||||
|
||||
|
||||
cdef class Token:
|
||||
|
@ -279,3 +291,6 @@ cdef class Token:
|
|||
property dep_:
|
||||
def __get__(self):
|
||||
return self.vocab.strings[self.c.dep]
|
||||
|
||||
|
||||
_pos_id_to_string = {id_: string for string, id_ in UNIV_POS_NAMES.items()}
|
||||
|
|
Loading…
Reference in New Issue
Block a user