diff --git a/spacy/pipeline/_parser_internals/_beam_utils.pyx b/spacy/pipeline/_parser_internals/_beam_utils.pyx index 9b91459bd..fff8d63e9 100644 --- a/spacy/pipeline/_parser_internals/_beam_utils.pyx +++ b/spacy/pipeline/_parser_internals/_beam_utils.pyx @@ -2,12 +2,6 @@ # cython: profile=True import numpy -from thinc.extra.search cimport Beam - -from thinc.extra.search import MaxViolation - -from thinc.extra.search cimport MaxViolation - from ...typedefs cimport class_t from .transition_system cimport Transition, TransitionSystem diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index cd93422ec..d6ebfc98c 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -17,7 +17,7 @@ from ..parts_of_speech import IDS as POS_IDS from ..scorer import Scorer from ..training import validate_examples, validate_get_examples from ..util import registry -from .tagger import Tagger +from .tagger import ActivationsT, Tagger # See #9050 BACKWARD_OVERWRITE = True diff --git a/spacy/pipeline/sentencizer.pyx b/spacy/pipeline/sentencizer.pyx index f7b47ec3e..28cf5d6b4 100644 --- a/spacy/pipeline/sentencizer.pyx +++ b/spacy/pipeline/sentencizer.pyx @@ -11,7 +11,6 @@ from .pipe import Pipe from .senter import senter_score - @Language.factory( "sentencizer", assigns=["token.is_sent_start", "doc.sents"], diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index 8241a75ba..95cd21f9b 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -6,28 +6,32 @@ from typing import Dict, Iterable, List, Optional, Tuple cimport numpy as np from cymem.cymem cimport Pool -from itertools import islice - import contextlib import random +from itertools import islice import numpy import numpy.random import srsly - -from thinc.api import CupyOps, NumpyOps, set_dropout_rate +from thinc.api import ( + CupyOps, + NumpyOps, + Optimizer, + get_array_module, + get_ops, + set_dropout_rate, +) from thinc.types import Floats2d, Ints1d from ..ml.tb_framework import TransitionModelInputs from ..tokens.doc cimport Doc -from ._parser_internals cimport _beam_utils -from ._parser_internals.stateclass cimport StateC, StateClass -from .trainable_pipe cimport TrainablePipe - from ..typedefs cimport weight_t from ..vocab cimport Vocab +from ._parser_internals cimport _beam_utils +from ._parser_internals.stateclass cimport StateC, StateClass from ._parser_internals.transition_system cimport Transition, TransitionSystem +from .trainable_pipe cimport TrainablePipe from .. import util from ..errors import Errors @@ -38,6 +42,11 @@ from ..training import ( ) from ._parser_internals import _beam_utils + +# TODO: Remove when we switch to Cython 3. +cdef extern from "" namespace "std" nogil: + bint equal[InputIt1, InputIt2](InputIt1 first1, InputIt1 last1, InputIt2 first2) except + + NUMPY_OPS = NumpyOps() diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py index b6e865325..f6cefbc1f 100644 --- a/spacy/tests/pipeline/test_tok2vec.py +++ b/spacy/tests/pipeline/test_tok2vec.py @@ -695,4 +695,4 @@ def test_tok2vec_listener_source_replace_listeners(): nlp2.add_pipe("tagger", source=nlp1) assert nlp2.get_pipe("tok2vec").listening_components == [] nlp2.add_pipe("ner", name="ner2", source=nlp1) - assert nlp2.get_pipe("tok2vec").listening_components == ["ner2"] \ No newline at end of file + assert nlp2.get_pipe("tok2vec").listening_components == ["ner2"] diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py index 646ce0f5d..b351ea801 100644 --- a/spacy/tests/serialize/test_serialize_config.py +++ b/spacy/tests/serialize/test_serialize_config.py @@ -18,6 +18,7 @@ from spacy.ml.models import ( build_Tok2Vec_model, ) from spacy.schemas import ConfigSchema, ConfigSchemaDistill, ConfigSchemaPretrain +from spacy.training import Example from spacy.util import ( load_config, load_config_from_str, diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index f0ab486a0..683be9d0a 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -13,7 +13,7 @@ from ..lexeme cimport Lexeme from ..structs cimport TokenC from ..symbols cimport dep from ..typedefs cimport attr_t -from .doc cimport _get_lca_matrix, get_token_attr +from .doc cimport _get_lca_matrix, get_token_attr, token_by_end, token_by_start from .token cimport Token from ..errors import Errors, Warnings diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index 14a40c96d..ff1120b7b 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -26,7 +26,6 @@ from ..attrs cimport ( LIKE_EMAIL, LIKE_NUM, LIKE_URL, - ORTH, ) from ..lexeme cimport Lexeme from ..symbols cimport conj @@ -426,7 +425,7 @@ cdef class Token: if "vector" in self.doc.user_token_hooks: return self.doc.user_token_hooks["vector"](self) else: - return self.vocab.get_vector(Token.get_struct_attr(self.c, self.vocab.vectors.attr)) + return self.vocab.get_vector(self.c.lex.orth) @property def vector_norm(self): diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 050b9743c..783e6d00a 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -65,7 +65,7 @@ cdef class Vectors: cdef readonly unicode eow cdef readonly attr_id_t attr - def __init__(self, *, strings=None, shape=None, data=None, keys=None, mode=Mode.default, minn=0, maxn=0, hash_count=1, hash_seed=0, bow="<", eow=">"): + def __init__(self, *, strings=None, shape=None, data=None, keys=None, mode=Mode.default, minn=0, maxn=0, hash_count=1, hash_seed=0, bow="<", eow=">", attr="ORTH"): """Create a new vector store. strings (StringStore): The string store.