* Break up tokens.pyx into tokens/doc.pyx, tokens/token.pyx, tokens/spans.pyx

This commit is contained in:
Matthew Honnibal 2015-07-13 20:20:58 +02:00
parent 3ea8756c24
commit 6eef0bf9ab
13 changed files with 39 additions and 29 deletions

View File

@ -14,7 +14,7 @@ from ..parts_of_speech cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON
from ..parts_of_speech cimport PRT, VERB, X, PUNCT, EOL, SPACE from ..parts_of_speech cimport PRT, VERB, X, PUNCT, EOL, SPACE
from ..typedefs cimport id_t from ..typedefs cimport id_t
from ..structs cimport TokenC, Morphology, LexemeC from ..structs cimport TokenC, Morphology, LexemeC
from ..tokens cimport Doc from ..tokens.doc cimport Doc
from ..morphology cimport set_morph_from_dict from ..morphology cimport set_morph_from_dict
from .._ml cimport arg_max from .._ml cimport arg_max

View File

@ -1,14 +0,0 @@
from .tokens cimport Doc
from .typedefs cimport flags_t, attr_id_t, attr_t
from .parts_of_speech cimport univ_pos_t
from .structs cimport Morphology, TokenC, LexemeC
from .vocab cimport Vocab
from .strings cimport StringStore
cdef class Span:
cdef readonly Doc _seq
cdef public int i
cdef public int start
cdef public int end
cdef readonly int label

View File

@ -12,7 +12,7 @@ from libc.string cimport memset
from itertools import combinations from itertools import combinations
from ..tokens cimport TokenC from ..structs cimport TokenC
from .stateclass cimport StateClass from .stateclass cimport StateClass

View File

@ -4,7 +4,8 @@ from .._ml cimport Model
from .arc_eager cimport TransitionSystem from .arc_eager cimport TransitionSystem
from ..tokens cimport Doc, TokenC from ..tokens.doc cimport Doc
from ..structs cimport TokenC
cdef class Parser: cdef class Parser:

View File

@ -32,7 +32,9 @@ from thinc.learner cimport LinearModel
from thinc.search cimport Beam from thinc.search cimport Beam
from thinc.search cimport MaxViolation from thinc.search cimport MaxViolation
from ..tokens cimport Doc, TokenC from ..structs cimport TokenC
from ..tokens.doc cimport Doc
from ..strings cimport StringStore from ..strings cimport StringStore

View File

@ -8,7 +8,7 @@ from cymem.cymem cimport Pool
from .typedefs cimport hash_t from .typedefs cimport hash_t
from .structs cimport LexemeC, TokenC, Morphology, UniStr from .structs cimport LexemeC, TokenC, Morphology, UniStr
from .strings cimport StringStore from .strings cimport StringStore
from .tokens cimport Doc from .tokens.doc cimport Doc
from .vocab cimport Vocab, _Cached from .vocab cimport Vocab, _Cached

View File

@ -16,7 +16,7 @@ from .morphology cimport set_morph_from_dict
from . import util from . import util
from .util import read_lang_data from .util import read_lang_data
from .tokens import Doc from .tokens.doc cimport Doc
cdef class Tokenizer: cdef class Tokenizer:

View File

@ -1,5 +0,0 @@
from .doc cimport Doc
from .token cimport Token
from .spans cimport Spans
__all__ = [Doc, Token, Spans]

View File

@ -1,5 +1,5 @@
from .doc import Doc from .doc import Doc
from .token import Token from .token import Token
from .spans import Spans from .spans import Span
__all__ = [Doc, Token, Spans] __all__ = [Doc, Token, Span]

View File

@ -31,5 +31,3 @@ cdef class Doc:
cpdef np.ndarray to_array(self, object features) cpdef np.ndarray to_array(self, object features)
cdef int set_parse(self, const TokenC* parsed) except -1 cdef int set_parse(self, const TokenC* parsed) except -1

9
spacy/tokens/spans.pxd Normal file
View File

@ -0,0 +1,9 @@
from .doc cimport Doc
cdef class Span:
cdef readonly Doc _seq
cdef public int i
cdef public int start
cdef public int end
cdef readonly int label

View File

@ -1,6 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from collections import defaultdict from collections import defaultdict
from ..structs cimport Morphology, TokenC, LexemeC
from ..typedefs cimport flags_t, attr_id_t, attr_t
from ..parts_of_speech cimport univ_pos_t
cdef class Span: cdef class Span:
"""A slice from a Doc object.""" """A slice from a Doc object."""

View File

@ -3,6 +3,18 @@ from cpython.mem cimport PyMem_Malloc, PyMem_Free
from ..lexeme cimport check_flag from ..lexeme cimport check_flag
# Compiler crashes on memory view coercion without this. Should report bug. # Compiler crashes on memory view coercion without this. Should report bug.
from cython.view cimport array as cvarray from cython.view cimport array as cvarray
cimport numpy as np
np.import_array()
import numpy
from ..parts_of_speech import UNIV_POS_NAMES
from ..typedefs cimport LEMMA
from ..typedefs cimport ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER
from ..typedefs cimport POS, LEMMA, TAG, DEP
from ..parts_of_speech cimport CONJ, PUNCT
cdef class Token: cdef class Token:
@ -279,3 +291,6 @@ cdef class Token:
property dep_: property dep_:
def __get__(self): def __get__(self):
return self.vocab.strings[self.c.dep] return self.vocab.strings[self.c.dep]
_pos_id_to_string = {id_: string for string, id_ in UNIV_POS_NAMES.items()}