mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Merge branch 'develop' into refactor/simplify-warnings
This commit is contained in:
commit
b0cfab317f
|
@ -1,9 +1,10 @@
|
|||
from cymem.cymem cimport Pool
|
||||
|
||||
from .tokens import Doc
|
||||
from .typedefs cimport attr_t
|
||||
from .syntax.transition_system cimport Transition
|
||||
|
||||
from .tokens import Doc
|
||||
|
||||
|
||||
cdef struct GoldParseC:
|
||||
int* tags
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
"""Knowledge-base for entity or concept linking."""
|
||||
from cymem.cymem cimport Pool
|
||||
from preshed.maps cimport PreshMap
|
||||
|
||||
from libcpp.vector cimport vector
|
||||
from libc.stdint cimport int32_t, int64_t
|
||||
from libc.stdio cimport FILE
|
||||
|
||||
from .vocab cimport Vocab
|
||||
from .typedefs cimport hash_t
|
||||
|
||||
from .structs cimport KBEntryC, AliasC
|
||||
|
||||
|
||||
ctypedef vector[KBEntryC] entry_vec
|
||||
ctypedef vector[AliasC] alias_vec
|
||||
ctypedef vector[float] float_vec
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=True
|
||||
# cython: infer_types=True, profile=True
|
||||
from cymem.cymem cimport Pool
|
||||
from preshed.maps cimport PreshMap
|
||||
from cpython.exc cimport PyErr_SetFromErrno
|
||||
|
|
|
@ -1,13 +1,12 @@
|
|||
from numpy cimport ndarray
|
||||
|
||||
from .typedefs cimport attr_t, hash_t, flags_t, len_t, tag_t
|
||||
from .attrs cimport attr_id_t
|
||||
from .attrs cimport ID, ORTH, LOWER, NORM, SHAPE, PREFIX, SUFFIX, LENGTH, CLUSTER, LANG
|
||||
|
||||
from .structs cimport LexemeC, SerializedLexemeC
|
||||
from .strings cimport StringStore
|
||||
from .vocab cimport Vocab
|
||||
|
||||
from numpy cimport ndarray
|
||||
|
||||
|
||||
cdef LexemeC EMPTY_LEXEME
|
||||
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=True
|
||||
# cython: infer_types=True, profile=True
|
||||
from cymem.cymem cimport Pool
|
||||
from preshed.maps cimport PreshMap
|
||||
from libcpp cimport bool
|
||||
|
||||
import numpy
|
||||
|
||||
from .matcher cimport Matcher
|
||||
from ..vocab cimport Vocab
|
||||
|
@ -10,8 +12,6 @@ from ..tokens.doc cimport Doc
|
|||
from .matcher import unpickle_matcher
|
||||
from ..errors import Errors
|
||||
|
||||
from libcpp cimport bool
|
||||
import numpy
|
||||
|
||||
DELIMITER = "||"
|
||||
INDEX_HEAD = 1
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=True
|
||||
# cython: infer_types=True, cython: profile=True
|
||||
from libcpp.vector cimport vector
|
||||
from libc.stdint cimport int32_t
|
||||
from cymem.cymem cimport Pool
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
from libcpp.vector cimport vector
|
||||
|
||||
from cymem.cymem cimport Pool
|
||||
from preshed.maps cimport key_t, MapStruct
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=True
|
||||
# cython: infer_types=True, profile=True
|
||||
from libc.stdint cimport uintptr_t
|
||||
from preshed.maps cimport map_init, map_set, map_get, map_clear, map_iter
|
||||
|
||||
|
|
|
@ -8,14 +8,14 @@ from .structs cimport TokenC, MorphAnalysisC
|
|||
from .strings cimport StringStore
|
||||
from .typedefs cimport hash_t, attr_t, flags_t
|
||||
from .parts_of_speech cimport univ_pos_t
|
||||
|
||||
from . cimport symbols
|
||||
|
||||
|
||||
cdef class Morphology:
|
||||
cdef readonly Pool mem
|
||||
cdef readonly StringStore strings
|
||||
cdef PreshMap tags # Keyed by hash, value is pointer to tag
|
||||
|
||||
|
||||
cdef public object lemmatizer
|
||||
cdef readonly object tag_map
|
||||
cdef readonly object tag_names
|
||||
|
@ -26,7 +26,7 @@ cdef class Morphology:
|
|||
|
||||
cdef MorphAnalysisC create_morph_tag(self, field_feature_pairs) except *
|
||||
cdef int insert(self, MorphAnalysisC tag) except -1
|
||||
|
||||
|
||||
cdef int assign_untagged(self, TokenC* token) except -1
|
||||
cdef int assign_tag(self, TokenC* token, tag) except -1
|
||||
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
|
||||
|
|
|
@ -1,19 +1,21 @@
|
|||
# cython: infer_types
|
||||
from libc.string cimport memset
|
||||
|
||||
import srsly
|
||||
from collections import Counter
|
||||
import numpy
|
||||
import warnings
|
||||
|
||||
from .strings import get_string_id
|
||||
from . import symbols
|
||||
from .attrs cimport POS, IS_SPACE
|
||||
from .attrs import LEMMA, intify_attrs
|
||||
from .parts_of_speech cimport SPACE
|
||||
from .parts_of_speech import IDS as POS_IDS
|
||||
from .lexeme cimport Lexeme
|
||||
|
||||
from .strings import get_string_id
|
||||
from .attrs import LEMMA, intify_attrs
|
||||
from .parts_of_speech import IDS as POS_IDS
|
||||
from .errors import Errors, Warnings
|
||||
from .util import ensure_path
|
||||
from . import symbols
|
||||
|
||||
|
||||
def _normalize_props(props):
|
||||
|
|
|
@ -1,20 +1,20 @@
|
|||
from collections import defaultdict
|
||||
|
||||
import numpy
|
||||
cimport numpy as np
|
||||
|
||||
import numpy
|
||||
from collections import defaultdict
|
||||
from thinc.api import chain, list2array, to_categorical, get_array_module
|
||||
from thinc.util import copy_array
|
||||
|
||||
from .. import util
|
||||
from .pipes import Pipe
|
||||
from ..language import component
|
||||
from ..util import link_vectors_to_models, create_default_optimizer
|
||||
from ..errors import Errors, TempErrors
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..vocab cimport Vocab
|
||||
from ..morphology cimport Morphology
|
||||
|
||||
from .. import util
|
||||
from ..language import component
|
||||
from ..util import link_vectors_to_models, create_default_optimizer
|
||||
from ..errors import Errors, TempErrors
|
||||
from .pipes import Pipe
|
||||
|
||||
|
||||
@component("morphologizer", assigns=["token.morph", "token.pos"])
|
||||
class Morphologizer(Pipe):
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=True
|
||||
# cython: infer_types=True, profile=True
|
||||
import numpy
|
||||
import srsly
|
||||
import random
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from libc.stdint cimport int64_t
|
||||
from libcpp.vector cimport vector
|
||||
from libcpp.set cimport set
|
||||
|
||||
from cymem.cymem cimport Pool
|
||||
from preshed.maps cimport PreshMap
|
||||
from murmurhash.mrmr cimport hash64
|
||||
|
|
|
@ -4,11 +4,13 @@ from libc.string cimport memcpy
|
|||
from libcpp.set cimport set
|
||||
from libc.stdint cimport uint32_t
|
||||
from murmurhash.mrmr cimport hash64, hash32
|
||||
|
||||
import srsly
|
||||
|
||||
from .typedefs cimport hash_t
|
||||
|
||||
from .symbols import IDS as SYMBOLS_BY_STR
|
||||
from .symbols import NAMES as SYMBOLS_BY_INT
|
||||
from .typedefs cimport hash_t
|
||||
from .errors import Errors
|
||||
from . import util
|
||||
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
from libc.stdint cimport uint8_t, uint32_t, int32_t, uint64_t
|
||||
|
||||
from .typedefs cimport flags_t, attr_t, hash_t
|
||||
from .parts_of_speech cimport univ_pos_t
|
||||
|
||||
from libcpp.vector cimport vector
|
||||
from libc.stdint cimport int32_t, int64_t
|
||||
|
||||
from .typedefs cimport flags_t, attr_t, hash_t
|
||||
from .parts_of_speech cimport univ_pos_t
|
||||
|
||||
|
||||
cdef struct LexemeC:
|
||||
|
|
|
@ -1,18 +1,19 @@
|
|||
# cython: infer_types=True
|
||||
# cython: profile=True
|
||||
# cython: infer_types=True, profile=True
|
||||
cimport numpy as np
|
||||
import numpy
|
||||
from cpython.ref cimport PyObject, Py_XDECREF
|
||||
from thinc.extra.search cimport Beam
|
||||
from thinc.extra.search import MaxViolation
|
||||
from thinc.extra.search cimport MaxViolation
|
||||
|
||||
from thinc.extra.search import MaxViolation
|
||||
import numpy
|
||||
|
||||
from ..typedefs cimport hash_t, class_t
|
||||
from .transition_system cimport TransitionSystem, Transition
|
||||
from ..gold cimport GoldParse
|
||||
from ..errors import Errors
|
||||
from .stateclass cimport StateC, StateClass
|
||||
|
||||
from ..errors import Errors
|
||||
|
||||
|
||||
# These are passed as callbacks to thinc.search.Beam
|
||||
cdef int transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1:
|
||||
|
@ -326,5 +327,3 @@ def cleanup_beam(Beam beam):
|
|||
seen.add(addr)
|
||||
else:
|
||||
raise ValueError(Errors.E023.format(addr=addr, i=i))
|
||||
|
||||
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
# cython: infer_types=True
|
||||
# cython: cdivision=True
|
||||
# cython: boundscheck=False
|
||||
import numpy
|
||||
# cython: infer_types=True, cdivision=True, boundscheck=False
|
||||
cimport cython.parallel
|
||||
import numpy.random
|
||||
cimport numpy as np
|
||||
from libc.math cimport exp
|
||||
from libcpp.vector cimport vector
|
||||
|
@ -11,21 +7,25 @@ from libc.string cimport memset, memcpy
|
|||
from libc.stdlib cimport calloc, free, realloc
|
||||
from cymem.cymem cimport Pool
|
||||
from thinc.extra.search cimport Beam
|
||||
from thinc.api import Linear, Model, CupyOps, NumpyOps, use_ops
|
||||
from thinc.backends.linalg cimport Vec, VecVec
|
||||
cimport blis.cy
|
||||
|
||||
import numpy
|
||||
import numpy.random
|
||||
from thinc.api import Linear, Model, CupyOps, NumpyOps, use_ops
|
||||
|
||||
from ..typedefs cimport weight_t, class_t, hash_t
|
||||
from ..compat import copy_array
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..gold cimport GoldParse
|
||||
from ..errors import Errors, TempErrors
|
||||
from .. import util
|
||||
from .stateclass cimport StateClass
|
||||
from .transition_system cimport Transition
|
||||
|
||||
from ..compat import copy_array
|
||||
from ..errors import Errors, TempErrors
|
||||
from ..util import link_vectors_to_models, create_default_optimizer
|
||||
from .. import util
|
||||
from . import _beam_utils
|
||||
from . import nonproj
|
||||
from ..util import link_vectors_to_models, create_default_optimizer
|
||||
|
||||
|
||||
cdef WeightsC get_c_weights(model) except *:
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
from libc.string cimport memcpy, memset, memmove
|
||||
from libc.stdlib cimport malloc, calloc, free
|
||||
from libc.stdint cimport uint32_t, uint64_t
|
||||
|
||||
from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
|
||||
|
||||
from murmurhash.mrmr cimport hash64
|
||||
|
||||
from ..vocab cimport EMPTY_LEXEME
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
from cymem.cymem cimport Pool
|
||||
|
||||
from ..typedefs cimport weight_t
|
||||
|
||||
from .stateclass cimport StateClass
|
||||
from ..typedefs cimport attr_t
|
||||
|
||||
from ..typedefs cimport weight_t, attr_t
|
||||
from .transition_system cimport TransitionSystem, Transition
|
||||
from ..gold cimport GoldParseC
|
||||
|
||||
|
@ -15,4 +12,3 @@ cdef class ArcEager(TransitionSystem):
|
|||
|
||||
cdef weight_t push_cost(StateClass stcls, const GoldParseC* gold, int target) nogil
|
||||
cdef weight_t arc_cost(StateClass stcls, const GoldParseC* gold, int head, int child) nogil
|
||||
|
||||
|
|
|
@ -1,23 +1,24 @@
|
|||
# cython: profile=True
|
||||
# cython: cdivision=True
|
||||
# cython: infer_types=True
|
||||
# cython: profile=True, cdivision=True, infer_types=True
|
||||
from cpython.ref cimport Py_INCREF
|
||||
from cymem.cymem cimport Pool
|
||||
from collections import defaultdict, Counter
|
||||
from thinc.extra.search cimport Beam
|
||||
|
||||
from collections import defaultdict, Counter
|
||||
import json
|
||||
|
||||
from .nonproj import is_nonproj_tree
|
||||
from ..typedefs cimport hash_t, attr_t
|
||||
from ..strings cimport hash_string
|
||||
from .stateclass cimport StateClass
|
||||
from ._state cimport StateC
|
||||
from . import nonproj
|
||||
from .transition_system cimport move_cost_func_t, label_cost_func_t
|
||||
from ..gold cimport GoldParse, GoldParseC
|
||||
from ..structs cimport TokenC
|
||||
from ..errors import Errors
|
||||
from ..tokens.doc cimport Doc, set_children_from_heads
|
||||
from .stateclass cimport StateClass
|
||||
from ._state cimport StateC
|
||||
from .transition_system cimport move_cost_func_t, label_cost_func_t
|
||||
|
||||
from ..errors import Errors
|
||||
from .nonproj import is_nonproj_tree
|
||||
from . import nonproj
|
||||
|
||||
|
||||
# Calculate cost as gold/not gold. We don't use scalar value anyway.
|
||||
cdef int BINARY_COSTS = 1
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from thinc.extra.search cimport Beam
|
||||
|
||||
from collections import Counter
|
||||
|
||||
from ..typedefs cimport weight_t
|
||||
|
@ -9,6 +10,7 @@ from .transition_system cimport do_func_t
|
|||
from ..gold cimport GoldParseC, GoldParse
|
||||
from ..lexeme cimport Lexeme
|
||||
from ..attrs cimport IS_SPACE
|
||||
|
||||
from ..errors import Errors
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
# cython: infer_types=True
|
||||
# cython: cdivision=True
|
||||
# cython: boundscheck=False
|
||||
# cython: infer_types=True, cdivision=True, boundscheck=False
|
||||
cimport cython.parallel
|
||||
cimport numpy as np
|
||||
from cpython.ref cimport PyObject, Py_XDECREF
|
||||
|
@ -21,23 +19,24 @@ import numpy.random
|
|||
import numpy
|
||||
import warnings
|
||||
|
||||
from ..gold import Example
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..gold cimport GoldParse
|
||||
from ..typedefs cimport weight_t, class_t, hash_t
|
||||
from ._parser_model cimport alloc_activations, free_activations
|
||||
from ._parser_model cimport predict_states, arg_max_if_valid
|
||||
from ._parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
|
||||
from ._parser_model cimport get_c_weights, get_c_sizes
|
||||
from ._parser_model import ParserModel
|
||||
from ..util import link_vectors_to_models, create_default_optimizer, registry
|
||||
from ..compat import copy_array
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..gold cimport GoldParse
|
||||
from ..errors import Errors, Warnings
|
||||
from .. import util
|
||||
from .stateclass cimport StateClass
|
||||
from ._state cimport StateC
|
||||
from .transition_system cimport Transition
|
||||
from . cimport _beam_utils
|
||||
|
||||
from ..gold import Example
|
||||
from ..util import link_vectors_to_models, create_default_optimizer, registry
|
||||
from ..compat import copy_array
|
||||
from ..errors import Errors, Warnings
|
||||
from .. import util
|
||||
from ._parser_model import ParserModel
|
||||
from . import _beam_utils
|
||||
from . import nonproj
|
||||
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
# cython: profile=True
|
||||
# cython: infer_types=True
|
||||
# cython: profile=True, infer_types=True
|
||||
"""Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
|
||||
for doing pseudo-projective parsing implementation uses the HEAD decoration
|
||||
scheme.
|
||||
"""
|
||||
from copy import copy
|
||||
|
||||
from ..gold import Example
|
||||
from ..tokens.doc cimport Doc, set_children_from_heads
|
||||
|
||||
from ..gold import Example
|
||||
from ..errors import Errors
|
||||
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@ from ..structs cimport TokenC
|
|||
from ..gold cimport GoldParse
|
||||
from ..gold cimport GoldParseC
|
||||
from ..strings cimport StringStore
|
||||
|
||||
from .stateclass cimport StateClass
|
||||
from ._state cimport StateC
|
||||
|
||||
|
|
|
@ -1,16 +1,18 @@
|
|||
# cython: infer_types=True
|
||||
from cpython.ref cimport Py_INCREF
|
||||
from cymem.cymem cimport Pool
|
||||
from ..typedefs cimport weight_t
|
||||
from thinc.extra.search cimport Beam
|
||||
|
||||
from collections import Counter
|
||||
import srsly
|
||||
|
||||
from ..typedefs cimport weight_t
|
||||
from . cimport _beam_utils
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..structs cimport TokenC
|
||||
from .stateclass cimport StateClass
|
||||
from ..typedefs cimport attr_t
|
||||
|
||||
from ..errors import Errors
|
||||
from .. import util
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
from libcpp.vector cimport vector
|
||||
|
||||
from preshed.maps cimport PreshMap
|
||||
from cymem.cymem cimport Pool
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
# cython: embedsignature=True
|
||||
# cython: profile=True
|
||||
# cython: embedsignature=True, profile=True
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from cython.operator cimport dereference as deref
|
||||
|
@ -15,13 +14,13 @@ import warnings
|
|||
|
||||
from .tokens.doc cimport Doc
|
||||
from .strings cimport hash_string
|
||||
from .lexeme cimport EMPTY_LEXEME
|
||||
|
||||
from .attrs import intify_attrs
|
||||
from .symbols import ORTH
|
||||
|
||||
from .errors import Errors, Warnings
|
||||
from . import util
|
||||
from .attrs import intify_attrs
|
||||
from .lexeme cimport EMPTY_LEXEME
|
||||
from .symbols import ORTH
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
# cython: infer_types=True
|
||||
# cython: bounds_check=False
|
||||
# cython: profile=True
|
||||
# cython: infer_types=True, bounds_check=False, profile=True
|
||||
from libc.string cimport memcpy, memset
|
||||
from libc.stdlib cimport malloc, free
|
||||
from cymem.cymem cimport Pool
|
||||
|
|
|
@ -1,12 +1,10 @@
|
|||
# cython: infer_types=True
|
||||
# cython: bounds_check=False
|
||||
# cython: profile=True
|
||||
# cython: infer_types=True, bounds_check=False, profile=True
|
||||
cimport cython
|
||||
cimport numpy as np
|
||||
from libc.string cimport memcpy, memset
|
||||
from libc.math cimport sqrt
|
||||
from collections import Counter
|
||||
|
||||
from collections import Counter
|
||||
import numpy
|
||||
import numpy.linalg
|
||||
import struct
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
cimport numpy as np
|
||||
from libc.math cimport sqrt
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ from ..typedefs cimport attr_t, flags_t
|
|||
from ..parts_of_speech cimport univ_pos_t
|
||||
from .doc cimport Doc
|
||||
from ..lexeme cimport Lexeme
|
||||
|
||||
from ..errors import Errors
|
||||
|
||||
|
||||
|
|
|
@ -18,12 +18,12 @@ from ..attrs cimport IS_OOV, IS_TITLE, IS_UPPER, IS_CURRENCY, LIKE_URL, LIKE_NUM
|
|||
from ..attrs cimport IS_STOP, ID, ORTH, NORM, LOWER, SHAPE, PREFIX, SUFFIX
|
||||
from ..attrs cimport LENGTH, CLUSTER, LEMMA, POS, TAG, DEP
|
||||
from ..symbols cimport conj
|
||||
from .morphanalysis cimport MorphAnalysis
|
||||
|
||||
from .. import parts_of_speech
|
||||
from .. import util
|
||||
from ..errors import Errors, Warnings
|
||||
from .underscore import Underscore, get_ext_args
|
||||
from .morphanalysis cimport MorphAnalysis
|
||||
|
||||
|
||||
cdef class Token:
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
from libcpp.vector cimport vector
|
||||
|
||||
from preshed.maps cimport PreshMap
|
||||
from cymem.cymem cimport Pool
|
||||
from murmurhash.mrmr cimport hash64
|
||||
|
|
Loading…
Reference in New Issue
Block a user