diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index 762854f76..873d85835 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -13,12 +13,9 @@ from ..errors import Errors from ..util import SimpleFrozenList, ensure_path from ..vocab import Vocab from ._dict_proxies import SpanGroups +from .doc import DOCBIN_ALL_ATTRS as ALL_ATTRS from .doc import Doc -# fmt: off -ALL_ATTRS = ("ORTH", "NORM", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "ENT_KB_ID", "ENT_ID", "LEMMA", "MORPH", "POS", "SENT_START") -# fmt: on - class DocBin: """Pack Doc objects for binary serialization. diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index c5a2721a3..206253949 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -52,12 +52,18 @@ from ..errors import Errors, Warnings from ..morphology import Morphology from ..util import get_words_and_spaces from ._retokenize import Retokenizer -from ._serialize import ALL_ATTRS as DOCBIN_ALL_ATTRS from .underscore import Underscore, get_ext_args DEF PADDING = 5 +# We store the docbin attrs here rather than in _serialize to avoid +# import cycles. + +# fmt: off +DOCBIN_ALL_ATTRS = ("ORTH", "NORM", "TAG", "HEAD", "DEP", "ENT_IOB", "ENT_TYPE", "ENT_KB_ID", "ENT_ID", "LEMMA", "MORPH", "POS", "SENT_START") +# fmt: on + cdef int bounds_check(int i, int length, int padding) except -1: if (i + padding) < 0: raise IndexError(Errors.E026.format(i=i, length=length)) diff --git a/spacy/training/callbacks.py b/spacy/training/callbacks.py index 053227a11..21c3d56a1 100644 --- a/spacy/training/callbacks.py +++ b/spacy/training/callbacks.py @@ -1,15 +1,17 @@ -from typing import Callable, Optional +from typing import TYPE_CHECKING, Callable, Optional from ..errors import Errors -from ..language import Language from ..util import load_model, logger, registry +if TYPE_CHECKING: + from ..language import Language + @registry.callbacks("spacy.copy_from_base_model.v1") def create_copy_from_base_model( tokenizer: Optional[str] = None, vocab: Optional[str] = None, -) -> Callable[[Language], Language]: +) -> Callable[["Language"], "Language"]: def copy_from_base_model(nlp): if tokenizer: logger.info("Copying tokenizer from: %s", tokenizer)