Renaming gold & annotation_setter (#6042)

* version bump to 3.0.0a16

* rename "gold" folder to "training"

* rename 'annotation_setter' to 'set_extra_annotations'

* formatting
This commit is contained in:
Sofie Van Landeghem 2020-09-09 10:31:03 +02:00 committed by GitHub
parent 60f22e1800
commit 8e7557656f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
86 changed files with 122 additions and 124 deletions

View File

@ -1,7 +1,7 @@
from pathlib import Path from pathlib import Path
import plac import plac
import spacy import spacy
from spacy.gold import docs_to_json from spacy.training import docs_to_json
import srsly import srsly
import sys import sys

View File

@ -23,7 +23,7 @@ Options.docstrings = True
PACKAGES = find_packages() PACKAGES = find_packages()
MOD_NAMES = [ MOD_NAMES = [
"spacy.gold.example", "spacy.training.example",
"spacy.parts_of_speech", "spacy.parts_of_speech",
"spacy.strings", "spacy.strings",
"spacy.lexeme", "spacy.lexeme",
@ -48,7 +48,7 @@ MOD_NAMES = [
"spacy.pipeline._parser_internals.stateclass", "spacy.pipeline._parser_internals.stateclass",
"spacy.pipeline._parser_internals.transition_system", "spacy.pipeline._parser_internals.transition_system",
"spacy.tokenizer", "spacy.tokenizer",
"spacy.gold.gold_io", "spacy.training.gold_io",
"spacy.tokens.doc", "spacy.tokens.doc",
"spacy.tokens.span", "spacy.tokens.span",
"spacy.tokens.token", "spacy.tokens.token",

View File

@ -1,6 +1,6 @@
# fmt: off # fmt: off
__title__ = "spacy-nightly" __title__ = "spacy-nightly"
__version__ = "3.0.0a15" __version__ = "3.0.0a16"
__release__ = True __release__ = True
__download_url__ = "https://github.com/explosion/spacy-models/releases/download" __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"

View File

@ -7,9 +7,9 @@ import re
import sys import sys
from ._util import app, Arg, Opt from ._util import app, Arg, Opt
from ..gold import docs_to_json from ..training import docs_to_json
from ..tokens import DocBin from ..tokens import DocBin
from ..gold.converters import iob2docs, conll_ner2docs, json2docs, conllu2docs from ..training.converters import iob2docs, conll_ner2docs, json2docs, conllu2docs
# Converters are matched by file extension except for ner/iob, which are # Converters are matched by file extension except for ner/iob, which are

View File

@ -8,7 +8,7 @@ import typer
from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
from ._util import import_code, debug_cli, get_sourced_components from ._util import import_code, debug_cli, get_sourced_components
from ..gold import Corpus, Example from ..training import Corpus, Example
from ..pipeline._parser_internals import nonproj from ..pipeline._parser_internals import nonproj
from ..language import Language from ..language import Language
from .. import util from .. import util

View File

@ -5,7 +5,7 @@ import re
import srsly import srsly
from thinc.api import require_gpu, fix_random_seed from thinc.api import require_gpu, fix_random_seed
from ..gold import Corpus from ..training import Corpus
from ..tokens import Doc from ..tokens import Doc
from ._util import app, Arg, Opt from ._util import app, Arg, Opt
from ..scorer import Scorer from ..scorer import Scorer

View File

@ -16,7 +16,7 @@ from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
from ._util import import_code, get_sourced_components from ._util import import_code, get_sourced_components
from ..language import Language from ..language import Language
from .. import util from .. import util
from ..gold.example import Example from ..training.example import Example
from ..errors import Errors from ..errors import Errors

View File

@ -66,7 +66,7 @@ class Warnings:
"in problems with the vocab further on in the pipeline.") "in problems with the vocab further on in the pipeline.")
W030 = ("Some entities could not be aligned in the text \"{text}\" with " W030 = ("Some entities could not be aligned in the text \"{text}\" with "
"entities \"{entities}\". Use " "entities \"{entities}\". Use "
"`spacy.gold.biluo_tags_from_offsets(nlp.make_doc(text), entities)`" "`spacy.training.biluo_tags_from_offsets(nlp.make_doc(text), entities)`"
" to check the alignment. Misaligned entities ('-') will be " " to check the alignment. Misaligned entities ('-') will be "
"ignored during training.") "ignored during training.")
W033 = ("Training a new {model} using a model with no lexeme normalization " W033 = ("Training a new {model} using a model with no lexeme normalization "

View File

@ -17,7 +17,7 @@ from timeit import default_timer as timer
from .tokens.underscore import Underscore from .tokens.underscore import Underscore
from .vocab import Vocab, create_vocab from .vocab import Vocab, create_vocab
from .pipe_analysis import validate_attrs, analyze_pipes, print_pipe_analysis from .pipe_analysis import validate_attrs, analyze_pipes, print_pipe_analysis
from .gold import Example, validate_examples from .training import Example, validate_examples
from .scorer import Scorer from .scorer import Scorer
from .util import create_default_optimizer, registry, SimpleFrozenList from .util import create_default_optimizer, registry, SimpleFrozenList
from .util import SimpleFrozenDict, combine_score_weights, CONFIG_SECTION_ORDER from .util import SimpleFrozenDict, combine_score_weights, CONFIG_SECTION_ORDER

View File

@ -8,7 +8,7 @@ from ...typedefs cimport hash_t, attr_t
from ...strings cimport hash_string from ...strings cimport hash_string
from ...structs cimport TokenC from ...structs cimport TokenC
from ...tokens.doc cimport Doc, set_children_from_heads from ...tokens.doc cimport Doc, set_children_from_heads
from ...gold.example cimport Example from ...training.example cimport Example
from ...errors import Errors from ...errors import Errors
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC

View File

@ -5,7 +5,7 @@ from cymem.cymem cimport Pool
from ...typedefs cimport weight_t, attr_t from ...typedefs cimport weight_t, attr_t
from ...lexeme cimport Lexeme from ...lexeme cimport Lexeme
from ...attrs cimport IS_SPACE from ...attrs cimport IS_SPACE
from ...gold.example cimport Example from ...training.example cimport Example
from ...errors import Errors from ...errors import Errors
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC

View File

@ -3,7 +3,7 @@ from cymem.cymem cimport Pool
from ...typedefs cimport attr_t, weight_t from ...typedefs cimport attr_t, weight_t
from ...structs cimport TokenC from ...structs cimport TokenC
from ...strings cimport StringStore from ...strings cimport StringStore
from ...gold.example cimport Example from ...training.example cimport Example
from .stateclass cimport StateClass from .stateclass cimport StateClass
from ._state cimport StateC from ._state cimport StateC

View File

@ -4,7 +4,7 @@ from pathlib import Path
from .pipe import Pipe from .pipe import Pipe
from ..errors import Errors from ..errors import Errors
from ..gold import validate_examples from ..training import validate_examples
from ..language import Language from ..language import Language
from ..matcher import Matcher from ..matcher import Matcher
from ..scorer import Scorer from ..scorer import Scorer

View File

@ -9,7 +9,7 @@ from .functions import merge_subtokens
from ..language import Language from ..language import Language
from ._parser_internals import nonproj from ._parser_internals import nonproj
from ..scorer import Scorer from ..scorer import Scorer
from ..gold import validate_examples from ..training import validate_examples
default_model_config = """ default_model_config = """

View File

@ -12,7 +12,7 @@ from ..tokens import Doc
from .pipe import Pipe, deserialize_config from .pipe import Pipe, deserialize_config
from ..language import Language from ..language import Language
from ..vocab import Vocab from ..vocab import Vocab
from ..gold import Example, validate_examples from ..training import Example, validate_examples
from ..errors import Errors, Warnings from ..errors import Errors, Warnings
from ..util import SimpleFrozenList from ..util import SimpleFrozenList
from .. import util from .. import util

View File

@ -9,7 +9,7 @@ from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList
from ..tokens import Doc, Span from ..tokens import Doc, Span
from ..matcher import Matcher, PhraseMatcher from ..matcher import Matcher, PhraseMatcher
from ..scorer import Scorer from ..scorer import Scorer
from ..gold import validate_examples from ..training import validate_examples
DEFAULT_ENT_ID_SEP = "||" DEFAULT_ENT_ID_SEP = "||"

View File

@ -8,7 +8,7 @@ from ..lookups import Lookups, load_lookups
from ..scorer import Scorer from ..scorer import Scorer
from ..tokens import Doc, Token from ..tokens import Doc, Token
from ..vocab import Vocab from ..vocab import Vocab
from ..gold import validate_examples from ..training import validate_examples
from .. import util from .. import util

View File

@ -16,7 +16,7 @@ from .pipe import deserialize_config
from .tagger import Tagger from .tagger import Tagger
from .. import util from .. import util
from ..scorer import Scorer from ..scorer import Scorer
from ..gold import validate_examples from ..training import validate_examples
default_model_config = """ default_model_config = """

View File

@ -8,7 +8,7 @@ from ..tokens.doc cimport Doc
from .pipe import Pipe from .pipe import Pipe
from .tagger import Tagger from .tagger import Tagger
from ..gold import validate_examples from ..training import validate_examples
from ..language import Language from ..language import Language
from ._parser_internals import nonproj from ._parser_internals import nonproj
from ..attrs import POS, ID from ..attrs import POS, ID

View File

@ -7,7 +7,7 @@ from ._parser_internals.ner cimport BiluoPushDown
from ..language import Language from ..language import Language
from ..scorer import Scorer from ..scorer import Scorer
from ..gold import validate_examples from ..training import validate_examples
default_model_config = """ default_model_config = """

View File

@ -4,7 +4,7 @@ from thinc.api import set_dropout_rate, Model
from ..tokens.doc cimport Doc from ..tokens.doc cimport Doc
from ..gold import validate_examples from ..training import validate_examples
from ..errors import Errors from ..errors import Errors
from .. import util from .. import util

View File

@ -7,7 +7,7 @@ from ..tokens.doc cimport Doc
from .pipe import Pipe from .pipe import Pipe
from ..language import Language from ..language import Language
from ..scorer import Scorer from ..scorer import Scorer
from ..gold import validate_examples from ..training import validate_examples
from .. import util from .. import util

View File

@ -11,7 +11,7 @@ from .tagger import Tagger
from ..language import Language from ..language import Language
from ..errors import Errors from ..errors import Errors
from ..scorer import Scorer from ..scorer import Scorer
from ..gold import validate_examples from ..training import validate_examples
from .. import util from .. import util

View File

@ -6,8 +6,8 @@ from thinc.util import to_numpy
from itertools import islice from itertools import islice
from ..errors import Errors from ..errors import Errors
from ..gold import Example, spans_from_biluo_tags, iob_to_biluo, biluo_to_iob from ..training import Example, spans_from_biluo_tags, iob_to_biluo, biluo_to_iob
from ..gold import validate_examples from ..training import validate_examples
from ..tokens import Doc from ..tokens import Doc
from ..language import Language from ..language import Language
from ..vocab import Vocab from ..vocab import Vocab

View File

@ -17,7 +17,7 @@ from ..attrs import POS, ID
from ..parts_of_speech import X from ..parts_of_speech import X
from ..errors import Errors, TempErrors, Warnings from ..errors import Errors, TempErrors, Warnings
from ..scorer import Scorer from ..scorer import Scorer
from ..gold import validate_examples from ..training import validate_examples
from .. import util from .. import util

View File

@ -6,7 +6,7 @@ import numpy
from .pipe import Pipe from .pipe import Pipe
from ..language import Language from ..language import Language
from ..gold import Example, validate_examples from ..training import Example, validate_examples
from ..errors import Errors from ..errors import Errors
from ..scorer import Scorer from ..scorer import Scorer
from .. import util from .. import util

View File

@ -3,7 +3,7 @@ from thinc.api import Model, set_dropout_rate, Optimizer, Config
from itertools import islice from itertools import islice
from .pipe import Pipe from .pipe import Pipe
from ..gold import Example, validate_examples from ..training import Example, validate_examples
from ..tokens import Doc from ..tokens import Doc
from ..vocab import Vocab from ..vocab import Vocab
from ..language import Language from ..language import Language

View File

@ -21,7 +21,7 @@ from ..ml.parser_model cimport WeightsC, ActivationsC, SizesC, cpu_log_loss
from ..ml.parser_model cimport get_c_weights, get_c_sizes from ..ml.parser_model cimport get_c_weights, get_c_sizes
from ..tokens.doc cimport Doc from ..tokens.doc cimport Doc
from ..gold import validate_examples from ..training import validate_examples
from ..errors import Errors, Warnings from ..errors import Errors, Warnings
from .. import util from .. import util

View File

@ -12,7 +12,7 @@ from .attrs import NAMES
if TYPE_CHECKING: if TYPE_CHECKING:
# This lets us add type hints for mypy etc. without causing circular imports # This lets us add type hints for mypy etc. without causing circular imports
from .language import Language # noqa: F401 from .language import Language # noqa: F401
from .gold import Example # noqa: F401 from .training import Example # noqa: F401
ItemT = TypeVar("ItemT") ItemT = TypeVar("ItemT")

View File

@ -1,7 +1,7 @@
from typing import Optional, Iterable, Dict, Any, Callable, TYPE_CHECKING from typing import Optional, Iterable, Dict, Any, Callable, TYPE_CHECKING
import numpy as np import numpy as np
from .gold import Example from .training import Example
from .tokens import Token, Doc, Span from .tokens import Token, Doc, Span
from .errors import Errors from .errors import Errors
from .util import get_lang_class, SimpleFrozenList from .util import get_lang_class, SimpleFrozenList

View File

@ -1,4 +1,4 @@
from spacy.gold import Example from spacy.training import Example
from spacy.pipeline import EntityRecognizer from spacy.pipeline import EntityRecognizer
from spacy.tokens import Span, Doc from spacy.tokens import Span, Doc
from spacy import registry from spacy import registry

View File

@ -3,7 +3,7 @@ from thinc.api import Adam, fix_random_seed
from spacy import registry from spacy import registry
from spacy.attrs import NORM from spacy.attrs import NORM
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.gold import Example from spacy.training import Example
from spacy.tokens import Doc from spacy.tokens import Doc
from spacy.pipeline import DependencyParser, EntityRecognizer from spacy.pipeline import DependencyParser, EntityRecognizer
from spacy.pipeline.ner import DEFAULT_NER_MODEL from spacy.pipeline.ner import DEFAULT_NER_MODEL

View File

@ -1,7 +1,7 @@
import pytest import pytest
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy import registry from spacy import registry
from spacy.gold import Example from spacy.training import Example
from spacy.pipeline import DependencyParser from spacy.pipeline import DependencyParser
from spacy.tokens import Doc from spacy.tokens import Doc
from spacy.pipeline._parser_internals.nonproj import projectivize from spacy.pipeline._parser_internals.nonproj import projectivize

View File

@ -4,7 +4,7 @@ from spacy.lang.en import English
from spacy.language import Language from spacy.language import Language
from spacy.lookups import Lookups from spacy.lookups import Lookups
from spacy.pipeline._parser_internals.ner import BiluoPushDown from spacy.pipeline._parser_internals.ner import BiluoPushDown
from spacy.gold import Example from spacy.training import Example
from spacy.tokens import Doc from spacy.tokens import Doc
from spacy.vocab import Vocab from spacy.vocab import Vocab
import logging import logging

View File

@ -1,7 +1,7 @@
import pytest import pytest
from spacy import registry from spacy import registry
from spacy.gold import Example from spacy.training import Example
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.pipeline._parser_internals.arc_eager import ArcEager from spacy.pipeline._parser_internals.arc_eager import ArcEager
from spacy.pipeline.transition_parser import Parser from spacy.pipeline.transition_parser import Parser

View File

@ -3,7 +3,7 @@ import pytest
from spacy.lang.en import English from spacy.lang.en import English
from ..util import get_doc, apply_transition_sequence, make_tempdir from ..util import get_doc, apply_transition_sequence, make_tempdir
from ... import util from ... import util
from ...gold import Example from ...training import Example
TRAIN_DATA = [ TRAIN_DATA = [
( (

View File

@ -3,7 +3,7 @@ from thinc.api import Adam
from spacy.attrs import NORM from spacy.attrs import NORM
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy import registry from spacy import registry
from spacy.gold import Example from spacy.training import Example
from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL from spacy.pipeline.dep_parser import DEFAULT_PARSER_MODEL
from spacy.tokens import Doc from spacy.tokens import Doc
from spacy.pipeline import DependencyParser from spacy.pipeline import DependencyParser

View File

@ -1,6 +1,6 @@
import pytest import pytest
import numpy import numpy
from spacy.gold import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.pipeline import AttributeRuler from spacy.pipeline import AttributeRuler
from spacy import util, registry from spacy import util, registry

View File

@ -4,7 +4,7 @@ import pytest
from spacy.kb import KnowledgeBase, get_candidates, Candidate from spacy.kb import KnowledgeBase, get_candidates, Candidate
from spacy import util, registry from spacy import util, registry
from spacy.gold import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.tests.util import make_tempdir from spacy.tests.util import make_tempdir
from spacy.tokens import Span from spacy.tokens import Span

View File

@ -1,7 +1,7 @@
import pytest import pytest
from spacy import util from spacy import util
from spacy.gold import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.language import Language from spacy.language import Language
from spacy.tests.util import make_tempdir from spacy.tests.util import make_tempdir

View File

@ -1,7 +1,7 @@
import pytest import pytest
from spacy import util from spacy import util
from spacy.gold import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.language import Language from spacy.language import Language
from spacy.tests.util import make_tempdir from spacy.tests.util import make_tempdir

View File

@ -1,6 +1,6 @@
import pytest import pytest
from spacy.lang.en import English from spacy.lang.en import English
from spacy.gold import Example from spacy.training import Example
from spacy import util from spacy import util
from ..util import make_tempdir from ..util import make_tempdir

View File

@ -1,6 +1,6 @@
import pytest import pytest
from spacy import util from spacy import util
from spacy.gold import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.language import Language from spacy.language import Language

View File

@ -10,7 +10,7 @@ from spacy.tokens import Doc
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
from ..util import make_tempdir from ..util import make_tempdir
from ...gold import Example from ...training import Example
TRAIN_DATA = [ TRAIN_DATA = [

View File

@ -1,7 +1,7 @@
import pytest import pytest
import random import random
from spacy import util from spacy import util
from spacy.gold import Example from spacy.training import Example
from spacy.matcher import Matcher from spacy.matcher import Matcher
from spacy.attrs import IS_PUNCT, ORTH, LOWER from spacy.attrs import IS_PUNCT, ORTH, LOWER
from spacy.vocab import Vocab from spacy.vocab import Vocab

View File

@ -3,7 +3,7 @@ import gc
import numpy import numpy
import copy import copy
from spacy.gold import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.lang.en.stop_words import STOP_WORDS from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.lex_attrs import is_stop from spacy.lang.lex_attrs import is_stop

View File

@ -3,7 +3,7 @@ import numpy
from spacy.tokens import Doc from spacy.tokens import Doc
from spacy.matcher import Matcher from spacy.matcher import Matcher
from spacy.displacy import render from spacy.displacy import render
from spacy.gold import iob_to_biluo from spacy.training import iob_to_biluo
from spacy.lang.it import Italian from spacy.lang.it import Italian
from spacy.lang.en import English from spacy.lang.en import English

View File

@ -1,6 +1,6 @@
import pytest import pytest
from spacy import displacy from spacy import displacy
from spacy.gold import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.lang.ja import Japanese from spacy.lang.ja import Japanese
from spacy.lang.xx import MultiLanguage from spacy.lang.xx import MultiLanguage

View File

@ -9,7 +9,7 @@ from spacy.tokens import Doc, Token
from spacy.matcher import Matcher, PhraseMatcher from spacy.matcher import Matcher, PhraseMatcher
from spacy.errors import MatchPatternError from spacy.errors import MatchPatternError
from spacy.util import minibatch from spacy.util import minibatch
from spacy.gold import Example from spacy.training import Example
from spacy.lang.hi import Hindi from spacy.lang.hi import Hindi
from spacy.lang.es import Spanish from spacy.lang.es import Spanish
from spacy.lang.en import English from spacy.lang.en import English

View File

@ -2,8 +2,8 @@ import pytest
from spacy.pipeline import Pipe from spacy.pipeline import Pipe
from spacy.matcher import PhraseMatcher, Matcher from spacy.matcher import PhraseMatcher, Matcher
from spacy.tokens import Doc, Span, DocBin from spacy.tokens import Doc, Span, DocBin
from spacy.gold import Example, Corpus from spacy.training import Example, Corpus
from spacy.gold.converters import json2docs from spacy.training.converters import json2docs
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.lang.en import English from spacy.lang.en import English
from spacy.util import minibatch, ensure_path, load_model from spacy.util import minibatch, ensure_path, load_model

View File

@ -1,9 +1,7 @@
import pytest import pytest
from mock import Mock
from spacy.matcher import DependencyMatcher
from spacy.tokens import Doc, Span, DocBin from spacy.tokens import Doc, Span, DocBin
from spacy.gold import Example from spacy.training import Example
from spacy.gold.converters.conllu2docs import conllu2docs from spacy.training.converters.conllu2docs import conllu2docs
from spacy.lang.en import English from spacy.lang.en import English
from spacy.kb import KnowledgeBase from spacy.kb import KnowledgeBase
from spacy.vocab import Vocab from spacy.vocab import Vocab
@ -12,7 +10,7 @@ from spacy.util import ensure_path, load_model_from_path
import numpy import numpy
import pickle import pickle
from ..util import get_doc, make_tempdir from ..util import make_tempdir
def test_issue4528(en_vocab): def test_issue4528(en_vocab):

View File

@ -1,8 +1,8 @@
import pytest import pytest
from click import NoSuchOption from click import NoSuchOption
from spacy.gold import docs_to_json, biluo_tags_from_offsets from spacy.training import docs_to_json, biluo_tags_from_offsets
from spacy.gold.converters import iob2docs, conll_ner2docs, conllu2docs from spacy.training.converters import iob2docs, conll_ner2docs, conllu2docs
from spacy.lang.en import English from spacy.lang.en import English
from spacy.schemas import ProjectConfigSchema, RecommendationSchema, validate from spacy.schemas import ProjectConfigSchema, RecommendationSchema, validate
from spacy.cli.pretrain import make_docs from spacy.cli.pretrain import make_docs

View File

@ -3,7 +3,7 @@ import pytest
from spacy.language import Language from spacy.language import Language
from spacy.tokens import Doc, Span from spacy.tokens import Doc, Span
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.gold import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.util import registry from spacy.util import registry

View File

@ -1,5 +1,5 @@
import pytest import pytest
from spacy.gold.example import Example from spacy.training.example import Example
from spacy.tokens import Doc from spacy.tokens import Doc
from spacy.vocab import Vocab from spacy.vocab import Vocab

View File

@ -1,8 +1,8 @@
from numpy.testing import assert_almost_equal, assert_array_almost_equal from numpy.testing import assert_almost_equal, assert_array_almost_equal
import pytest import pytest
from pytest import approx from pytest import approx
from spacy.gold import Example from spacy.training import Example
from spacy.gold.iob_utils import biluo_tags_from_offsets from spacy.training.iob_utils import biluo_tags_from_offsets
from spacy.scorer import Scorer, ROCAUCScore from spacy.scorer import Scorer, ROCAUCScore
from spacy.scorer import _roc_auc_score, _roc_curve from spacy.scorer import _roc_auc_score, _roc_curve
from .util import get_doc from .util import get_doc

View File

@ -6,7 +6,7 @@ from spacy.ml.models.tok2vec import MishWindowEncoder, MaxoutWindowEncoder
from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener from spacy.pipeline.tok2vec import Tok2Vec, Tok2VecListener
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.tokens import Doc from spacy.tokens import Doc
from spacy.gold import Example from spacy.training import Example
from spacy import util from spacy import util
from spacy.lang.en import English from spacy.lang.en import English
from .util import get_batch from .util import get_batch

View File

@ -1,9 +1,10 @@
import numpy import numpy
from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags, Alignment from spacy.training import biluo_tags_from_offsets, offsets_from_biluo_tags, Alignment
from spacy.gold import spans_from_biluo_tags, iob_to_biluo from spacy.training import spans_from_biluo_tags, iob_to_biluo
from spacy.gold import Corpus, docs_to_json from spacy.training import Corpus, docs_to_json
from spacy.gold.example import Example from spacy.training.example import Example
from spacy.gold.converters import json2docs from spacy.training.converters import json2docs
from spacy.training.augment import make_orth_variants_example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.tokens import Doc, DocBin from spacy.tokens import Doc, DocBin
from spacy.util import get_words_and_spaces, minibatch from spacy.util import get_words_and_spaces, minibatch
@ -12,7 +13,6 @@ import pytest
import srsly import srsly
from .util import make_tempdir from .util import make_tempdir
from ..gold.augment import make_orth_variants_example
@pytest.fixture @pytest.fixture

View File

@ -5,7 +5,7 @@ from .util import get_random_doc
from spacy import util from spacy import util
from spacy.util import dot_to_object, SimpleFrozenList from spacy.util import dot_to_object, SimpleFrozenList
from thinc.api import Config, Optimizer from thinc.api import Config, Optimizer
from spacy.gold.batchers import minibatch_by_words from spacy.training.batchers import minibatch_by_words
from ..lang.en import English from ..lang.en import English
from ..lang.nl import Dutch from ..lang.nl import Dutch
from ..language import DEFAULT_CONFIG_PATH from ..language import DEFAULT_CONFIG_PATH

View File

@ -24,7 +24,7 @@ from .util import registry
from .attrs import intify_attrs from .attrs import intify_attrs
from .symbols import ORTH from .symbols import ORTH
from .scorer import Scorer from .scorer import Scorer
from .gold import validate_examples from .training import validate_examples
cdef class Tokenizer: cdef class Tokenizer:

View File

@ -1,7 +1,7 @@
from wasabi import Printer from wasabi import Printer
from .. import tags_to_entities from .. import tags_to_entities
from ...gold import iob_to_biluo from ...training import iob_to_biluo
from ...lang.xx import MultiLanguage from ...lang.xx import MultiLanguage
from ...tokens import Doc, Span from ...tokens import Doc, Span
from ...util import load_model from ...util import load_model

View File

@ -1,7 +1,7 @@
import re import re
from .conll_ner2docs import n_sents_info from .conll_ner2docs import n_sents_info
from ...gold import iob_to_biluo, spans_from_biluo_tags from ...training import iob_to_biluo, spans_from_biluo_tags
from ...tokens import Doc, Token, Span from ...tokens import Doc, Token, Span
from ...vocab import Vocab from ...vocab import Vocab
from wasabi import Printer from wasabi import Printer

View File

@ -1,7 +1,7 @@
from wasabi import Printer from wasabi import Printer
from .conll_ner2docs import n_sents_info from .conll_ner2docs import n_sents_info
from ...gold import iob_to_biluo, tags_to_entities from ...training import iob_to_biluo, tags_to_entities
from ...tokens import Doc, Span from ...tokens import Doc, Span
from ...util import minibatch from ...util import minibatch

View File

@ -290,10 +290,10 @@ always be the **last element** in the row.
> ``` > ```
| Name | Description | | Name | Description |
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `vocab` | The shared vocabulary. ~~Vocab~~ | | `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. ~~Model[List[Doc], FullTransformerBatch]~~ | | `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. ~~Model[List[Doc], FullTransformerBatch]~~ |
| `annotation_setter` | Function that takes a batch of `Doc` objects and transformer outputs can set additional annotations on the `Doc`. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | | `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs and can set additional annotations on the `Doc`. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ |
### List {#list} ### List {#list}

View File

@ -357,12 +357,12 @@ File /path/to/spacy/ml/models/tok2vec.py (line 207)
[training.logger] [training.logger]
Registry @loggers Registry @loggers
Name spacy.ConsoleLogger.v1 Name spacy.ConsoleLogger.v1
Module spacy.gold.loggers Module spacy.training.loggers
File /path/to/spacy/gold/loggers.py (line 8) File /path/to/spacy/gold/loggers.py (line 8)
[training.batcher] [training.batcher]
Registry @batchers Registry @batchers
Name spacy.batch_by_words.v1 Name spacy.batch_by_words.v1
Module spacy.gold.batchers Module spacy.training.batchers
File /path/to/spacy/gold/batchers.py (line 49) File /path/to/spacy/gold/batchers.py (line 49)
[training.batcher.size] [training.batcher.size]
Registry @schedules Registry @schedules
@ -372,7 +372,7 @@ File /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 43)
[training.dev_corpus] [training.dev_corpus]
Registry @readers Registry @readers
Name spacy.Corpus.v1 Name spacy.Corpus.v1
Module spacy.gold.corpus Module spacy.training.corpus
File /path/to/spacy/gold/corpus.py (line 18) File /path/to/spacy/gold/corpus.py (line 18)
[training.optimizer] [training.optimizer]
Registry @optimizers Registry @optimizers
@ -387,7 +387,7 @@ File /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 91)
[training.train_corpus] [training.train_corpus]
Registry @readers Registry @readers
Name spacy.Corpus.v1 Name spacy.Corpus.v1
Module spacy.gold.corpus Module spacy.training.corpus
File /path/to/spacy/gold/corpus.py (line 18) File /path/to/spacy/gold/corpus.py (line 18)
``` ```

View File

@ -58,7 +58,7 @@ train/test skew.
> #### Example > #### Example
> >
> ```python > ```python
> from spacy.gold import Corpus > from spacy.training import Corpus
> >
> # With a single file > # With a single file
> corpus = Corpus("./data/train.spacy") > corpus = Corpus("./data/train.spacy")
@ -82,7 +82,7 @@ Yield examples from the data.
> #### Example > #### Example
> >
> ```python > ```python
> from spacy.gold import Corpus > from spacy.training import Corpus
> import spacy > import spacy
> >
> corpus = Corpus("./train.spacy") > corpus = Corpus("./train.spacy")

View File

@ -175,7 +175,7 @@ run [`spacy pretrain`](/api/cli#pretrain).
> >
> ```python > ```python
> from spacy.tokens import DocBin > from spacy.tokens import DocBin
> from spacy.gold import Corpus > from spacy.training import Corpus
> >
> doc_bin = DocBin(docs=docs) > doc_bin = DocBin(docs=docs)
> doc_bin.to_disk("./data.spacy") > doc_bin.to_disk("./data.spacy")

View File

@ -22,7 +22,7 @@ both documents.
> >
> ```python > ```python
> from spacy.tokens import Doc > from spacy.tokens import Doc
> from spacy.gold import Example > from spacy.training import Example
> >
> words = ["hello", "world", "!"] > words = ["hello", "world", "!"]
> spaces = [True, False, False] > spaces = [True, False, False]
@ -48,7 +48,7 @@ see the [training format documentation](/api/data-formats#dict-input).
> >
> ```python > ```python
> from spacy.tokens import Doc > from spacy.tokens import Doc
> from spacy.gold import Example > from spacy.training import Example
> >
> predicted = Doc(vocab, words=["Apply", "some", "sunscreen"]) > predicted = Doc(vocab, words=["Apply", "some", "sunscreen"])
> token_ref = ["Apply", "some", "sun", "screen"] > token_ref = ["Apply", "some", "sun", "screen"]
@ -301,7 +301,7 @@ tokenizations add up to the same string. For example, you'll be able to align
> #### Example > #### Example
> >
> ```python > ```python
> from spacy.gold import Alignment > from spacy.training import Alignment
> >
> bert_tokens = ["obama", "'", "s", "podcast"] > bert_tokens = ["obama", "'", "s", "podcast"]
> spacy_tokens = ["obama", "'s", "podcast"] > spacy_tokens = ["obama", "'s", "podcast"]

View File

@ -538,7 +538,7 @@ sequences in the batch.
## Training data and alignment {#gold source="spacy/gold"} ## Training data and alignment {#gold source="spacy/gold"}
### gold.biluo_tags_from_offsets {#biluo_tags_from_offsets tag="function"} ### training.biluo_tags_from_offsets {#biluo_tags_from_offsets tag="function"}
Encode labelled spans into per-token tags, using the Encode labelled spans into per-token tags, using the
[BILUO scheme](/usage/linguistic-features#accessing-ner) (Begin, In, Last, Unit, [BILUO scheme](/usage/linguistic-features#accessing-ner) (Begin, In, Last, Unit,
@ -554,7 +554,7 @@ single-token entity.
> #### Example > #### Example
> >
> ```python > ```python
> from spacy.gold import biluo_tags_from_offsets > from spacy.training import biluo_tags_from_offsets
> >
> doc = nlp("I like London.") > doc = nlp("I like London.")
> entities = [(7, 13, "LOC")] > entities = [(7, 13, "LOC")]
@ -568,7 +568,7 @@ single-token entity.
| `entities` | A sequence of `(start, end, label)` triples. `start` and `end` should be character-offset integers denoting the slice into the original string. ~~List[Tuple[int, int, Union[str, int]]]~~ | | `entities` | A sequence of `(start, end, label)` triples. `start` and `end` should be character-offset integers denoting the slice into the original string. ~~List[Tuple[int, int, Union[str, int]]]~~ |
| **RETURNS** | A list of strings, describing the [BILUO](/usage/linguistic-features#accessing-ner) tags. ~~List[str]~~ | | **RETURNS** | A list of strings, describing the [BILUO](/usage/linguistic-features#accessing-ner) tags. ~~List[str]~~ |
### gold.offsets_from_biluo_tags {#offsets_from_biluo_tags tag="function"} ### training.offsets_from_biluo_tags {#offsets_from_biluo_tags tag="function"}
Encode per-token tags following the Encode per-token tags following the
[BILUO scheme](/usage/linguistic-features#accessing-ner) into entity offsets. [BILUO scheme](/usage/linguistic-features#accessing-ner) into entity offsets.
@ -576,7 +576,7 @@ Encode per-token tags following the
> #### Example > #### Example
> >
> ```python > ```python
> from spacy.gold import offsets_from_biluo_tags > from spacy.training import offsets_from_biluo_tags
> >
> doc = nlp("I like London.") > doc = nlp("I like London.")
> tags = ["O", "O", "U-LOC", "O"] > tags = ["O", "O", "U-LOC", "O"]
@ -590,7 +590,7 @@ Encode per-token tags following the
| `entities` | A sequence of [BILUO](/usage/linguistic-features#accessing-ner) tags with each tag describing one token. Each tag string will be of the form of either `""`, `"O"` or `"{action}-{label}"`, where action is one of `"B"`, `"I"`, `"L"`, `"U"`. ~~List[str]~~ | | `entities` | A sequence of [BILUO](/usage/linguistic-features#accessing-ner) tags with each tag describing one token. Each tag string will be of the form of either `""`, `"O"` or `"{action}-{label}"`, where action is one of `"B"`, `"I"`, `"L"`, `"U"`. ~~List[str]~~ |
| **RETURNS** | A sequence of `(start, end, label)` triples. `start` and `end` will be character-offset integers denoting the slice into the original string. ~~List[Tuple[int, int, str]]~~ | | **RETURNS** | A sequence of `(start, end, label)` triples. `start` and `end` will be character-offset integers denoting the slice into the original string. ~~List[Tuple[int, int, str]]~~ |
### gold.spans_from_biluo_tags {#spans_from_biluo_tags tag="function" new="2.1"} ### training.spans_from_biluo_tags {#spans_from_biluo_tags tag="function" new="2.1"}
Encode per-token tags following the Encode per-token tags following the
[BILUO scheme](/usage/linguistic-features#accessing-ner) into [BILUO scheme](/usage/linguistic-features#accessing-ner) into
@ -600,7 +600,7 @@ token-based tags, e.g. to overwrite the `doc.ents`.
> #### Example > #### Example
> >
> ```python > ```python
> from spacy.gold import spans_from_biluo_tags > from spacy.training import spans_from_biluo_tags
> >
> doc = nlp("I like London.") > doc = nlp("I like London.")
> tags = ["O", "O", "U-LOC", "O"] > tags = ["O", "O", "U-LOC", "O"]

View File

@ -62,9 +62,9 @@ on the transformer architectures and their arguments and hyperparameters.
> ``` > ```
| Setting | Description | | Setting | Description |
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `max_batch_items` | Maximum size of a padded batch. Defaults to `4096`. ~~int~~ | | `max_batch_items` | Maximum size of a padded batch. Defaults to `4096`. ~~int~~ |
| `annotation_setter` | Function that takes a batch of `Doc` objects and transformer outputs to set additional annotations on the `Doc`. The `Doc._.transformer_data` attribute is set prior to calling the callback. Defaults to `null_annotation_setter` (no additional annotations). ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | | `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs to set additional annotations on the `Doc`. The `Doc._.transformer_data` attribute is set prior to calling the callback. Defaults to `null_annotation_setter` (no additional annotations). ~~Callable[[List[Doc], FullTransformerBatch], None]~~ |
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Defaults to [TransformerModel](/api/architectures#TransformerModel). ~~Model[List[Doc], FullTransformerBatch]~~ | | `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Defaults to [TransformerModel](/api/architectures#TransformerModel). ~~Model[List[Doc], FullTransformerBatch]~~ |
```python ```python
@ -103,10 +103,10 @@ your application, you would normally use a shortcut for this and instantiate the
component using its string name and [`nlp.add_pipe`](/api/language#create_pipe). component using its string name and [`nlp.add_pipe`](/api/language#create_pipe).
| Name | Description | | Name | Description |
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `vocab` | The shared vocabulary. ~~Vocab~~ | | `vocab` | The shared vocabulary. ~~Vocab~~ |
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Usually you will want to use the [TransformerModel](/api/architectures#TransformerModel) layer for this. ~~Model[List[Doc], FullTransformerBatch]~~ | | `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) wrapping the transformer. Usually you will want to use the [TransformerModel](/api/architectures#TransformerModel) layer for this. ~~Model[List[Doc], FullTransformerBatch]~~ |
| `annotation_setter` | Function that takes a batch of `Doc` objects and transformer outputs and stores the annotations on the `Doc`. The `Doc._.trf_data` attribute is set prior to calling the callback. By default, no additional annotations are set. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ | | `set_extra_annotations` | Function that takes a batch of `Doc` objects and transformer outputs and stores the annotations on the `Doc`. The `Doc._.trf_data` attribute is set prior to calling the callback. By default, no additional annotations are set. ~~Callable[[List[Doc], FullTransformerBatch], None]~~ |
| _keyword-only_ | | | _keyword-only_ | |
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ | | `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
| `max_batch_items` | Maximum size of a padded batch. Defaults to `128*32`. ~~int~~ | | `max_batch_items` | Maximum size of a padded batch. Defaults to `128*32`. ~~int~~ |
@ -205,7 +205,7 @@ modifying them.
Assign the extracted features to the Doc objects. By default, the Assign the extracted features to the Doc objects. By default, the
[`TransformerData`](/api/transformer#transformerdata) object is written to the [`TransformerData`](/api/transformer#transformerdata) object is written to the
[`Doc._.trf_data`](#custom-attributes) attribute. Your annotation_setter [`Doc._.trf_data`](#custom-attributes) attribute. Your `set_extra_annotations`
callback is then called, if provided. callback is then called, if provided.
> #### Example > #### Example

View File

@ -253,10 +253,10 @@ for doc in nlp.pipe(["some text", "some other text"]):
You can also customize how the [`Transformer`](/api/transformer) component sets You can also customize how the [`Transformer`](/api/transformer) component sets
annotations onto the [`Doc`](/api/doc), by specifying a custom annotations onto the [`Doc`](/api/doc), by specifying a custom
`annotation_setter`. This callback will be called with the raw input and output `set_extra_annotations` function. This callback will be called with the raw
data for the whole batch, along with the batch of `Doc` objects, allowing you to input and output data for the whole batch, along with the batch of `Doc`
implement whatever you need. The annotation setter is called with a batch of objects, allowing you to implement whatever you need. The annotation setter is
[`Doc`](/api/doc) objects and a called with a batch of [`Doc`](/api/doc) objects and a
[`FullTransformerBatch`](/api/transformer#fulltransformerbatch) containing the [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) containing the
transformers data for the batch. transformers data for the batch.
@ -267,7 +267,7 @@ def custom_annotation_setter(docs, trf_data):
doc._.custom_attr = data doc._.custom_attr = data
nlp = spacy.load("en_core_trf_lg") nlp = spacy.load("en_core_trf_lg")
nlp.get_pipe("transformer").annotation_setter = custom_annotation_setter nlp.get_pipe("transformer").set_extra_annotations = custom_annotation_setter
doc = nlp("This is a text") doc = nlp("This is a text")
assert isinstance(doc._.custom_attr, TransformerData) assert isinstance(doc._.custom_attr, TransformerData)
print(doc._.custom_attr.tensors) print(doc._.custom_attr.tensors)
@ -314,7 +314,7 @@ component:
> get_spans=get_doc_spans, > get_spans=get_doc_spans,
> tokenizer_config={"use_fast": True}, > tokenizer_config={"use_fast": True},
> ), > ),
> annotation_setter=null_annotation_setter, > set_extra_annotations=null_annotation_setter,
> max_batch_items=4096, > max_batch_items=4096,
> ) > )
> ``` > ```
@ -333,7 +333,7 @@ tokenizer_config = {"use_fast": true}
[components.transformer.model.get_spans] [components.transformer.model.get_spans]
@span_getters = "spacy-transformers.doc_spans.v1" @span_getters = "spacy-transformers.doc_spans.v1"
[components.transformer.annotation_setter] [components.transformer.set_extra_annotations]
@annotation_setters = "spacy-transformers.null_annotation_setter.v1" @annotation_setters = "spacy-transformers.null_annotation_setter.v1"
``` ```

View File

@ -1366,7 +1366,7 @@ token.
```python ```python
### {executable="true"} ### {executable="true"}
from spacy.gold import Alignment from spacy.training import Alignment
other_tokens = ["i", "listened", "to", "obama", "'", "s", "podcasts", "."] other_tokens = ["i", "listened", "to", "obama", "'", "s", "podcasts", "."]
spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts", "."] spacy_tokens = ["i", "listened", "to", "obama", "'s", "podcasts", "."]

View File

@ -1500,7 +1500,7 @@ add those entities to the `doc.ents`, you can wrap it in a custom pipeline
component function and pass it the token texts from the `Doc` object received by component function and pass it the token texts from the `Doc` object received by
the component. the component.
The [`gold.spans_from_biluo_tags`](/api/top-level#spans_from_biluo_tags) is very The [`training.spans_from_biluo_tags`](/api/top-level#spans_from_biluo_tags) is very
helpful here, because it takes a `Doc` object and token-based BILUO tags and helpful here, because it takes a `Doc` object and token-based BILUO tags and
returns a sequence of `Span` objects in the `Doc` with added labels. So all your returns a sequence of `Span` objects in the `Doc` with added labels. So all your
wrapper has to do is compute the entity spans and overwrite the `doc.ents`. wrapper has to do is compute the entity spans and overwrite the `doc.ents`.
@ -1515,7 +1515,7 @@ wrapper has to do is compute the entity spans and overwrite the `doc.ents`.
```python ```python
### {highlight="1,8-9"} ### {highlight="1,8-9"}
import your_custom_entity_recognizer import your_custom_entity_recognizer
from spacy.gold import offsets_from_biluo_tags from spacy.training import offsets_from_biluo_tags
from spacy.language import Language from spacy.language import Language
@Language.component("custom_ner_wrapper") @Language.component("custom_ner_wrapper")

View File

@ -735,7 +735,7 @@ as **config settings** in this case, `source`.
### functions.py {highlight="7-8"} ### functions.py {highlight="7-8"}
from typing import Callable, Iterator, List from typing import Callable, Iterator, List
import spacy import spacy
from spacy.gold import Example from spacy.training import Example
from spacy.language import Language from spacy.language import Language
import random import random
@ -783,7 +783,7 @@ annotations are the same.
### functions.py ### functions.py
from typing import Callable, Iterable, Iterator, List from typing import Callable, Iterable, Iterator, List
import spacy import spacy
from spacy.gold import Example from spacy.training import Example
@spacy.registry.batchers("filtering_batch.v1") @spacy.registry.batchers("filtering_batch.v1")
def filter_batch(size: int) -> Callable[[Iterable[Example]], Iterator[List[Example]]]: def filter_batch(size: int) -> Callable[[Iterable[Example]], Iterator[List[Example]]]: