mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 16:22:29 +03:00
Merge branch 'explosion:master' into feature/candidate-generation-by-docs
This commit is contained in:
commit
7a4ef51807
75
.github/azure-steps.yml
vendored
75
.github/azure-steps.yml
vendored
|
@ -1,9 +1,7 @@
|
|||
parameters:
|
||||
python_version: ''
|
||||
architecture: ''
|
||||
prefix: ''
|
||||
gpu: false
|
||||
num_build_jobs: 1
|
||||
architecture: 'x64'
|
||||
num_build_jobs: 2
|
||||
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
|
@ -17,16 +15,16 @@ steps:
|
|||
displayName: 'Set variables'
|
||||
|
||||
- script: |
|
||||
${{ parameters.prefix }} python -m pip install -U pip setuptools
|
||||
${{ parameters.prefix }} python -m pip install -U -r requirements.txt
|
||||
python -m pip install -U build pip setuptools
|
||||
python -m pip install -U -r requirements.txt
|
||||
displayName: "Install dependencies"
|
||||
|
||||
- script: |
|
||||
${{ parameters.prefix }} python setup.py build_ext --inplace -j ${{ parameters.num_build_jobs }}
|
||||
${{ parameters.prefix }} python setup.py sdist --formats=gztar
|
||||
displayName: "Compile and build sdist"
|
||||
python -m build --sdist
|
||||
displayName: "Build sdist"
|
||||
|
||||
- script: python -m mypy spacy
|
||||
- script: |
|
||||
python -m mypy spacy
|
||||
displayName: 'Run mypy'
|
||||
condition: ne(variables['python_version'], '3.6')
|
||||
|
||||
|
@ -35,35 +33,24 @@ steps:
|
|||
contents: "spacy"
|
||||
displayName: "Delete source directory"
|
||||
|
||||
- task: DeleteFiles@1
|
||||
inputs:
|
||||
contents: "*.egg-info"
|
||||
displayName: "Delete egg-info directory"
|
||||
|
||||
- script: |
|
||||
${{ parameters.prefix }} python -m pip freeze --exclude torch --exclude cupy-cuda110 > installed.txt
|
||||
${{ parameters.prefix }} python -m pip uninstall -y -r installed.txt
|
||||
python -m pip freeze > installed.txt
|
||||
python -m pip uninstall -y -r installed.txt
|
||||
displayName: "Uninstall all packages"
|
||||
|
||||
- bash: |
|
||||
${{ parameters.prefix }} SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
||||
${{ parameters.prefix }} SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST
|
||||
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
||||
SPACY_NUM_BUILD_JOBS=${{ parameters.num_build_jobs }} python -m pip install dist/$SDIST
|
||||
displayName: "Install from sdist"
|
||||
|
||||
- script: |
|
||||
${{ parameters.prefix }} python -m pip install -U -r requirements.txt
|
||||
displayName: "Install test requirements"
|
||||
|
||||
- script: |
|
||||
${{ parameters.prefix }} python -m pip install -U cupy-cuda110 -f https://github.com/cupy/cupy/releases/v9.0.0
|
||||
${{ parameters.prefix }} python -m pip install "torch==1.7.1+cu110" -f https://download.pytorch.org/whl/torch_stable.html
|
||||
displayName: "Install GPU requirements"
|
||||
condition: eq(${{ parameters.gpu }}, true)
|
||||
|
||||
- script: |
|
||||
${{ parameters.prefix }} python -m pytest --pyargs spacy -W error
|
||||
displayName: "Run CPU tests"
|
||||
condition: eq(${{ parameters.gpu }}, false)
|
||||
|
||||
- script: |
|
||||
${{ parameters.prefix }} python -m pytest --pyargs spacy -W error -p spacy.tests.enable_gpu
|
||||
displayName: "Run GPU tests"
|
||||
condition: eq(${{ parameters.gpu }}, true)
|
||||
python -W error -c "import spacy"
|
||||
displayName: "Test import"
|
||||
|
||||
- script: |
|
||||
python -m spacy download ca_core_news_sm
|
||||
|
@ -72,6 +59,11 @@ steps:
|
|||
displayName: 'Test download CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
|
||||
- script: |
|
||||
python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
|
||||
displayName: 'Test no warnings on load (#11713)'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
|
||||
- script: |
|
||||
python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
|
||||
displayName: 'Test convert CLI'
|
||||
|
@ -106,13 +98,22 @@ steps:
|
|||
displayName: 'Test assemble CLI vectors warning'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
|
||||
- script: |
|
||||
python -m pip install -U -r requirements.txt
|
||||
displayName: "Install test requirements"
|
||||
|
||||
- script: |
|
||||
python -m pytest --pyargs spacy -W error
|
||||
displayName: "Run CPU tests"
|
||||
|
||||
- script: |
|
||||
python -m pip install --pre thinc-apple-ops
|
||||
python -m pytest --pyargs spacy
|
||||
displayName: "Run CPU tests with thinc-apple-ops"
|
||||
condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.11'))
|
||||
|
||||
- script: |
|
||||
python .github/validate_universe_json.py website/meta/universe.json
|
||||
displayName: 'Test website/meta/universe.json'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
|
||||
- script: |
|
||||
${{ parameters.prefix }} python -m pip install --pre thinc-apple-ops
|
||||
${{ parameters.prefix }} python -m pytest --pyargs spacy
|
||||
displayName: "Run CPU tests with thinc-apple-ops"
|
||||
condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.10'))
|
||||
|
|
2
.github/workflows/autoblack.yml
vendored
2
.github/workflows/autoblack.yml
vendored
|
@ -15,7 +15,7 @@ jobs:
|
|||
- uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{ github.head_ref }}
|
||||
- uses: actions/setup-python@v3
|
||||
- uses: actions/setup-python@v4
|
||||
- run: pip install black
|
||||
- name: Auto-format code if needed
|
||||
run: black spacy
|
||||
|
|
6
.github/workflows/explosionbot.yml
vendored
6
.github/workflows/explosionbot.yml
vendored
|
@ -8,14 +8,14 @@ on:
|
|||
|
||||
jobs:
|
||||
explosion-bot:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Dump GitHub context
|
||||
env:
|
||||
GITHUB_CONTEXT: ${{ toJson(github) }}
|
||||
run: echo "$GITHUB_CONTEXT"
|
||||
- uses: actions/checkout@v1
|
||||
- uses: actions/setup-python@v1
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v4
|
||||
- name: Install and run explosion-bot
|
||||
run: |
|
||||
pip install git+https://${{ secrets.EXPLOSIONBOT_TOKEN }}@github.com/explosion/explosion-bot
|
||||
|
|
6
.github/workflows/slowtests.yml
vendored
6
.github/workflows/slowtests.yml
vendored
|
@ -14,7 +14,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
ref: ${{ matrix.branch }}
|
||||
- name: Get commits from past 24 hours
|
||||
|
@ -23,9 +23,9 @@ jobs:
|
|||
today=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
yesterday=$(date -d "yesterday" '+%Y-%m-%d %H:%M:%S')
|
||||
if git log --after="$yesterday" --before="$today" | grep commit ; then
|
||||
echo "::set-output name=run_tests::true"
|
||||
echo run_tests=true >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "::set-output name=run_tests::false"
|
||||
echo run_tests=false >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Trigger buildkite build
|
||||
|
|
4
.github/workflows/spacy_universe_alert.yml
vendored
4
.github/workflows/spacy_universe_alert.yml
vendored
|
@ -17,8 +17,8 @@ jobs:
|
|||
run: |
|
||||
echo "$GITHUB_CONTEXT"
|
||||
|
||||
- uses: actions/checkout@v1
|
||||
- uses: actions/setup-python@v1
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v4
|
||||
- name: Install Bernadette app dependency and send an alert
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
|
|
@ -87,13 +87,13 @@ jobs:
|
|||
# python.version: "3.10"
|
||||
Python311Linux:
|
||||
imageName: 'ubuntu-latest'
|
||||
python.version: '3.11.0-rc.2'
|
||||
python.version: '3.11.0'
|
||||
Python311Windows:
|
||||
imageName: 'windows-latest'
|
||||
python.version: '3.11.0-rc.2'
|
||||
python.version: '3.11.0'
|
||||
Python311Mac:
|
||||
imageName: 'macos-latest'
|
||||
python.version: '3.11.0-rc.2'
|
||||
python.version: '3.11.0'
|
||||
maxParallel: 4
|
||||
pool:
|
||||
vmImage: $(imageName)
|
||||
|
@ -101,20 +101,3 @@ jobs:
|
|||
- template: .github/azure-steps.yml
|
||||
parameters:
|
||||
python_version: '$(python.version)'
|
||||
architecture: 'x64'
|
||||
|
||||
# - job: "TestGPU"
|
||||
# dependsOn: "Validate"
|
||||
# strategy:
|
||||
# matrix:
|
||||
# Python38LinuxX64_GPU:
|
||||
# python.version: '3.8'
|
||||
# pool:
|
||||
# name: "LinuxX64_GPU"
|
||||
# steps:
|
||||
# - template: .github/azure-steps.yml
|
||||
# parameters:
|
||||
# python_version: '$(python.version)'
|
||||
# architecture: 'x64'
|
||||
# gpu: true
|
||||
# num_build_jobs: 24
|
||||
|
|
|
@ -72,7 +72,7 @@ subword_features = true
|
|||
"textcat",
|
||||
assigns=["doc.cats"],
|
||||
default_config={
|
||||
"threshold": 0.5,
|
||||
"threshold": 0.0,
|
||||
"model": DEFAULT_SINGLE_TEXTCAT_MODEL,
|
||||
"scorer": {"@scorers": "spacy.textcat_scorer.v1"},
|
||||
},
|
||||
|
@ -144,7 +144,8 @@ class TextCategorizer(TrainablePipe):
|
|||
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
||||
name (str): The component instance name, used to add entries to the
|
||||
losses during training.
|
||||
threshold (float): Cutoff to consider a prediction "positive".
|
||||
threshold (float): Unused, not needed for single-label (exclusive
|
||||
classes) classification.
|
||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||
Scorer.score_cats for the attribute "cats".
|
||||
|
||||
|
@ -154,7 +155,11 @@ class TextCategorizer(TrainablePipe):
|
|||
self.model = model
|
||||
self.name = name
|
||||
self._rehearsal_model = None
|
||||
cfg = {"labels": [], "threshold": threshold, "positive_label": None}
|
||||
cfg: Dict[str, Any] = {
|
||||
"labels": [],
|
||||
"threshold": threshold,
|
||||
"positive_label": None,
|
||||
}
|
||||
self.cfg = dict(cfg)
|
||||
self.scorer = scorer
|
||||
|
||||
|
|
|
@ -446,7 +446,7 @@ class Scorer:
|
|||
labels (Iterable[str]): The set of possible labels. Defaults to [].
|
||||
multi_label (bool): Whether the attribute allows multiple labels.
|
||||
Defaults to True. When set to False (exclusive labels), missing
|
||||
gold labels are interpreted as 0.0.
|
||||
gold labels are interpreted as 0.0 and the threshold is set to 0.0.
|
||||
positive_label (str): The positive label for a binary task with
|
||||
exclusive classes. Defaults to None.
|
||||
threshold (float): Cutoff to consider a prediction "positive". Defaults
|
||||
|
@ -471,6 +471,8 @@ class Scorer:
|
|||
"""
|
||||
if threshold is None:
|
||||
threshold = 0.5 if multi_label else 0.0
|
||||
if not multi_label:
|
||||
threshold = 0.0
|
||||
f_per_type = {label: PRFScore() for label in labels}
|
||||
auc_per_type = {label: ROCAUCScore() for label in labels}
|
||||
labels = set(labels)
|
||||
|
@ -505,11 +507,10 @@ class Scorer:
|
|||
# Get the highest-scoring for each.
|
||||
pred_label, pred_score = max(pred_cats.items(), key=lambda it: it[1])
|
||||
gold_label, gold_score = max(gold_cats.items(), key=lambda it: it[1])
|
||||
if pred_label == gold_label and pred_score >= threshold:
|
||||
if pred_label == gold_label:
|
||||
f_per_type[pred_label].tp += 1
|
||||
else:
|
||||
f_per_type[gold_label].fn += 1
|
||||
if pred_score >= threshold:
|
||||
f_per_type[pred_label].fp += 1
|
||||
elif gold_cats:
|
||||
gold_label, gold_score = max(gold_cats, key=lambda it: it[1])
|
||||
|
@ -517,7 +518,6 @@ class Scorer:
|
|||
f_per_type[gold_label].fn += 1
|
||||
elif pred_cats:
|
||||
pred_label, pred_score = max(pred_cats.items(), key=lambda it: it[1])
|
||||
if pred_score >= threshold:
|
||||
f_per_type[pred_label].fp += 1
|
||||
micro_prf = PRFScore()
|
||||
for label_prf in f_per_type.values():
|
||||
|
|
|
@ -370,3 +370,12 @@ def test_json_to_doc_validation_error(doc):
|
|||
doc_json.pop("tokens")
|
||||
with pytest.raises(ValueError):
|
||||
Doc(doc.vocab).from_json(doc_json, validate=True)
|
||||
|
||||
|
||||
def test_to_json_underscore_doc_getters(doc):
|
||||
def get_text_length(doc):
|
||||
return len(doc.text)
|
||||
|
||||
Doc.set_extension("text_length", getter=get_text_length)
|
||||
doc_json = doc.to_json(underscore=["text_length"])
|
||||
assert doc_json["_"]["text_length"] == get_text_length(doc)
|
||||
|
|
|
@ -823,10 +823,10 @@ def test_textcat_loss(multi_label: bool, expected_loss: float):
|
|||
assert loss == expected_loss
|
||||
|
||||
|
||||
def test_textcat_threshold():
|
||||
def test_textcat_multilabel_threshold():
|
||||
# Ensure the scorer can be called with a different threshold
|
||||
nlp = English()
|
||||
nlp.add_pipe("textcat")
|
||||
nlp.add_pipe("textcat_multilabel")
|
||||
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA_SINGLE_LABEL:
|
||||
|
@ -849,7 +849,7 @@ def test_textcat_threshold():
|
|||
)
|
||||
pos_f = scores["cats_score"]
|
||||
assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0
|
||||
assert pos_f > macro_f
|
||||
assert pos_f >= macro_f
|
||||
|
||||
|
||||
def test_textcat_multi_threshold():
|
||||
|
|
|
@ -474,3 +474,50 @@ def test_prf_score():
|
|||
assert (a.precision, a.recall, a.fscore) == approx(
|
||||
(c.precision, c.recall, c.fscore)
|
||||
)
|
||||
|
||||
|
||||
def test_score_cats(en_tokenizer):
|
||||
text = "some text"
|
||||
gold_doc = en_tokenizer(text)
|
||||
gold_doc.cats = {"POSITIVE": 1.0, "NEGATIVE": 0.0}
|
||||
pred_doc = en_tokenizer(text)
|
||||
pred_doc.cats = {"POSITIVE": 0.75, "NEGATIVE": 0.25}
|
||||
example = Example(pred_doc, gold_doc)
|
||||
# threshold is ignored for multi_label=False
|
||||
scores1 = Scorer.score_cats(
|
||||
[example],
|
||||
"cats",
|
||||
labels=list(gold_doc.cats.keys()),
|
||||
multi_label=False,
|
||||
positive_label="POSITIVE",
|
||||
threshold=0.1,
|
||||
)
|
||||
scores2 = Scorer.score_cats(
|
||||
[example],
|
||||
"cats",
|
||||
labels=list(gold_doc.cats.keys()),
|
||||
multi_label=False,
|
||||
positive_label="POSITIVE",
|
||||
threshold=0.9,
|
||||
)
|
||||
assert scores1["cats_score"] == 1.0
|
||||
assert scores2["cats_score"] == 1.0
|
||||
assert scores1 == scores2
|
||||
# threshold is relevant for multi_label=True
|
||||
scores = Scorer.score_cats(
|
||||
[example],
|
||||
"cats",
|
||||
labels=list(gold_doc.cats.keys()),
|
||||
multi_label=True,
|
||||
threshold=0.9,
|
||||
)
|
||||
assert scores["cats_macro_f"] == 0.0
|
||||
# threshold is relevant for multi_label=True
|
||||
scores = Scorer.score_cats(
|
||||
[example],
|
||||
"cats",
|
||||
labels=list(gold_doc.cats.keys()),
|
||||
multi_label=True,
|
||||
threshold=0.1,
|
||||
)
|
||||
assert scores["cats_macro_f"] == 0.5
|
||||
|
|
|
@ -1668,6 +1668,20 @@ cdef class Doc:
|
|||
|
||||
if underscore:
|
||||
user_keys = set()
|
||||
# Handle doc attributes with .get to include values from getters
|
||||
# and not only values stored in user_data, for backwards
|
||||
# compatibility
|
||||
for attr in underscore:
|
||||
if self.has_extension(attr):
|
||||
if "_" not in data:
|
||||
data["_"] = {}
|
||||
value = self._.get(attr)
|
||||
if not srsly.is_json_serializable(value):
|
||||
raise ValueError(Errors.E107.format(attr=attr, value=repr(value)))
|
||||
data["_"][attr] = value
|
||||
user_keys.add(attr)
|
||||
# Token and span attributes only include values stored in user_data
|
||||
# and not values generated by getters
|
||||
if self.user_data:
|
||||
for data_key, value in self.user_data.copy().items():
|
||||
if type(data_key) == tuple and len(data_key) >= 4 and data_key[0] == "._.":
|
||||
|
@ -1678,20 +1692,15 @@ cdef class Doc:
|
|||
user_keys.add(attr)
|
||||
if not srsly.is_json_serializable(value):
|
||||
raise ValueError(Errors.E107.format(attr=attr, value=repr(value)))
|
||||
# Check if doc attribute
|
||||
if start is None:
|
||||
if "_" not in data:
|
||||
data["_"] = {}
|
||||
data["_"][attr] = value
|
||||
# Check if token attribute
|
||||
elif end is None:
|
||||
# Token attribute
|
||||
if start is not None and end is None:
|
||||
if "underscore_token" not in data:
|
||||
data["underscore_token"] = {}
|
||||
if attr not in data["underscore_token"]:
|
||||
data["underscore_token"][attr] = []
|
||||
data["underscore_token"][attr].append({"start": start, "value": value})
|
||||
# Else span attribute
|
||||
else:
|
||||
# Span attribute
|
||||
elif start is not None and end is not None:
|
||||
if "underscore_span" not in data:
|
||||
data["underscore_span"] = {}
|
||||
if attr not in data["underscore_span"]:
|
||||
|
|
|
@ -443,9 +443,9 @@ def load_model_from_package(
|
|||
name: str,
|
||||
*,
|
||||
vocab: Union["Vocab", bool] = True,
|
||||
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||
disable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||
) -> "Language":
|
||||
"""Load a model from an installed package.
|
||||
|
@ -619,9 +619,9 @@ def load_model_from_init_py(
|
|||
init_file: Union[Path, str],
|
||||
*,
|
||||
vocab: Union["Vocab", bool] = True,
|
||||
disable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||
enable: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||
exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
|
||||
disable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||
enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||
exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES,
|
||||
config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
|
||||
) -> "Language":
|
||||
"""Helper function to use in the `load()` method of a model package's
|
||||
|
|
|
@ -230,14 +230,15 @@ The reported `{attr}_score` depends on the classification properties:
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||
| `attr` | The attribute to score. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `getter` | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the cats for an individual `Doc`. ~~Callable[[Doc, str], Dict[str, float]]~~ |
|
||||
| labels | The set of possible labels. Defaults to `[]`. ~~Iterable[str]~~ |
|
||||
| `multi_label` | Whether the attribute allows multiple labels. Defaults to `True`. ~~bool~~ |
|
||||
| `multi_label` | Whether the attribute allows multiple labels. Defaults to `True`. When set to `False` (exclusive labels), missing gold labels are interpreted as `0.0` and the threshold is set to `0.0`. ~~bool~~ |
|
||||
| `positive_label` | The positive label for a binary task with exclusive classes. Defaults to `None`. ~~Optional[str]~~ |
|
||||
| `threshold` | Cutoff to consider a prediction "positive". Defaults to `0.5` for multi-label, and `0.0` (i.e. whatever's highest scoring) otherwise. ~~float~~ |
|
||||
| **RETURNS** | A dictionary containing the scores, with inapplicable scores as `None`. ~~Dict[str, Optional[float]]~~ |
|
||||
|
||||
## Scorer.score_links {#score_links tag="staticmethod" new="3"}
|
||||
|
|
|
@ -63,7 +63,6 @@ architectures and their arguments and hyperparameters.
|
|||
> ```python
|
||||
> from spacy.pipeline.textcat import DEFAULT_SINGLE_TEXTCAT_MODEL
|
||||
> config = {
|
||||
> "threshold": 0.5,
|
||||
> "model": DEFAULT_SINGLE_TEXTCAT_MODEL,
|
||||
> }
|
||||
> nlp.add_pipe("textcat", config=config)
|
||||
|
@ -82,7 +81,7 @@ architectures and their arguments and hyperparameters.
|
|||
|
||||
| Setting | Description |
|
||||
| ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
|
||||
| `threshold` | Cutoff to consider a prediction "positive", relevant for `textcat_multilabel` when calculating accuracy scores. ~~float~~ |
|
||||
| `model` | A model instance that predicts scores for each category. Defaults to [TextCatEnsemble](/api/architectures#TextCatEnsemble). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
|
||||
|
||||
|
@ -123,7 +122,7 @@ shortcut for this and instantiate the component using its string name and
|
|||
| `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
| `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ |
|
||||
| _keyword-only_ | |
|
||||
| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ |
|
||||
| `threshold` | Cutoff to consider a prediction "positive", relevant for `textcat_multilabel` when calculating accuracy scores. ~~float~~ |
|
||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ |
|
||||
|
||||
## TextCategorizer.\_\_call\_\_ {#call tag="method"}
|
||||
|
|
|
@ -1792,7 +1792,7 @@ the entity `Span` – for example `._.orgs` or `._.prev_orgs` and
|
|||
> [`Doc.retokenize`](/api/doc#retokenize) context manager:
|
||||
>
|
||||
> ```python
|
||||
> with doc.retokenize() as retokenize:
|
||||
> with doc.retokenize() as retokenizer:
|
||||
> for ent in doc.ents:
|
||||
> retokenizer.merge(ent)
|
||||
> ```
|
||||
|
|
Loading…
Reference in New Issue
Block a user