diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml index cc0247b3a..e8bd0d212 100644 --- a/.github/azure-steps.yml +++ b/.github/azure-steps.yml @@ -1,9 +1,7 @@ parameters: python_version: '' - architecture: '' - prefix: '' - gpu: false - num_build_jobs: 1 + architecture: 'x64' + num_build_jobs: 2 steps: - task: UsePythonVersion@0 @@ -17,16 +15,16 @@ steps: displayName: 'Set variables' - script: | - ${{ parameters.prefix }} python -m pip install -U pip setuptools - ${{ parameters.prefix }} python -m pip install -U -r requirements.txt + python -m pip install -U build pip setuptools + python -m pip install -U -r requirements.txt displayName: "Install dependencies" - script: | - ${{ parameters.prefix }} python setup.py build_ext --inplace -j ${{ parameters.num_build_jobs }} - ${{ parameters.prefix }} python setup.py sdist --formats=gztar - displayName: "Compile and build sdist" + python -m build --sdist + displayName: "Build sdist" - - script: python -m mypy spacy + - script: | + python -m mypy spacy displayName: 'Run mypy' condition: ne(variables['python_version'], '3.6') @@ -35,35 +33,24 @@ steps: contents: "spacy" displayName: "Delete source directory" + - task: DeleteFiles@1 + inputs: + contents: "*.egg-info" + displayName: "Delete egg-info directory" + - script: | - ${{ parameters.prefix }} python -m pip freeze --exclude torch --exclude cupy-cuda110 > installed.txt - ${{ parameters.prefix }} python -m pip uninstall -y -r installed.txt + python -m pip freeze > installed.txt + python -m pip uninstall -y -r installed.txt displayName: "Uninstall all packages" - bash: | - ${{ parameters.prefix }} SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1) - ${{ parameters.prefix }} SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST + SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1) + SPACY_NUM_BUILD_JOBS=${{ parameters.num_build_jobs }} python -m pip install dist/$SDIST displayName: "Install from sdist" - script: | - ${{ parameters.prefix }} python -m pip install -U -r requirements.txt - displayName: "Install test requirements" - - - script: | - ${{ parameters.prefix }} python -m pip install -U cupy-cuda110 -f https://github.com/cupy/cupy/releases/v9.0.0 - ${{ parameters.prefix }} python -m pip install "torch==1.7.1+cu110" -f https://download.pytorch.org/whl/torch_stable.html - displayName: "Install GPU requirements" - condition: eq(${{ parameters.gpu }}, true) - - - script: | - ${{ parameters.prefix }} python -m pytest --pyargs spacy -W error - displayName: "Run CPU tests" - condition: eq(${{ parameters.gpu }}, false) - - - script: | - ${{ parameters.prefix }} python -m pytest --pyargs spacy -W error -p spacy.tests.enable_gpu - displayName: "Run GPU tests" - condition: eq(${{ parameters.gpu }}, true) + python -W error -c "import spacy" + displayName: "Test import" - script: | python -m spacy download ca_core_news_sm @@ -72,6 +59,11 @@ steps: displayName: 'Test download CLI' condition: eq(variables['python_version'], '3.8') + - script: | + python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')" + displayName: 'Test no warnings on load (#11713)' + condition: eq(variables['python_version'], '3.8') + - script: | python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json . displayName: 'Test convert CLI' @@ -106,13 +98,22 @@ steps: displayName: 'Test assemble CLI vectors warning' condition: eq(variables['python_version'], '3.8') + - script: | + python -m pip install -U -r requirements.txt + displayName: "Install test requirements" + + - script: | + python -m pytest --pyargs spacy -W error + displayName: "Run CPU tests" + + - script: | + python -m pip install --pre thinc-apple-ops + python -m pytest --pyargs spacy + displayName: "Run CPU tests with thinc-apple-ops" + condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.11')) + - script: | python .github/validate_universe_json.py website/meta/universe.json displayName: 'Test website/meta/universe.json' condition: eq(variables['python_version'], '3.8') - - script: | - ${{ parameters.prefix }} python -m pip install --pre thinc-apple-ops - ${{ parameters.prefix }} python -m pytest --pyargs spacy - displayName: "Run CPU tests with thinc-apple-ops" - condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.10')) diff --git a/.github/workflows/autoblack.yml b/.github/workflows/autoblack.yml index 3ad4cf408..70882c3cc 100644 --- a/.github/workflows/autoblack.yml +++ b/.github/workflows/autoblack.yml @@ -15,7 +15,7 @@ jobs: - uses: actions/checkout@v3 with: ref: ${{ github.head_ref }} - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v4 - run: pip install black - name: Auto-format code if needed run: black spacy diff --git a/.github/workflows/explosionbot.yml b/.github/workflows/explosionbot.yml index d585ecd9c..6b472cd12 100644 --- a/.github/workflows/explosionbot.yml +++ b/.github/workflows/explosionbot.yml @@ -8,14 +8,14 @@ on: jobs: explosion-bot: - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - name: Dump GitHub context env: GITHUB_CONTEXT: ${{ toJson(github) }} run: echo "$GITHUB_CONTEXT" - - uses: actions/checkout@v1 - - uses: actions/setup-python@v1 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 - name: Install and run explosion-bot run: | pip install git+https://${{ secrets.EXPLOSIONBOT_TOKEN }}@github.com/explosion/explosion-bot diff --git a/.github/workflows/slowtests.yml b/.github/workflows/slowtests.yml index 38ceb18c6..f9fd3e817 100644 --- a/.github/workflows/slowtests.yml +++ b/.github/workflows/slowtests.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v1 + uses: actions/checkout@v3 with: ref: ${{ matrix.branch }} - name: Get commits from past 24 hours @@ -23,9 +23,9 @@ jobs: today=$(date '+%Y-%m-%d %H:%M:%S') yesterday=$(date -d "yesterday" '+%Y-%m-%d %H:%M:%S') if git log --after="$yesterday" --before="$today" | grep commit ; then - echo "::set-output name=run_tests::true" + echo run_tests=true >> $GITHUB_OUTPUT else - echo "::set-output name=run_tests::false" + echo run_tests=false >> $GITHUB_OUTPUT fi - name: Trigger buildkite build diff --git a/.github/workflows/spacy_universe_alert.yml b/.github/workflows/spacy_universe_alert.yml index cbbf14c6e..f507e0594 100644 --- a/.github/workflows/spacy_universe_alert.yml +++ b/.github/workflows/spacy_universe_alert.yml @@ -17,8 +17,8 @@ jobs: run: | echo "$GITHUB_CONTEXT" - - uses: actions/checkout@v1 - - uses: actions/setup-python@v1 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 - name: Install Bernadette app dependency and send an alert env: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} diff --git a/azure-pipelines.yml b/azure-pipelines.yml index eea07cb7a..3499042cb 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -87,13 +87,13 @@ jobs: # python.version: "3.10" Python311Linux: imageName: 'ubuntu-latest' - python.version: '3.11.0-rc.2' + python.version: '3.11.0' Python311Windows: imageName: 'windows-latest' - python.version: '3.11.0-rc.2' + python.version: '3.11.0' Python311Mac: imageName: 'macos-latest' - python.version: '3.11.0-rc.2' + python.version: '3.11.0' maxParallel: 4 pool: vmImage: $(imageName) @@ -101,20 +101,3 @@ jobs: - template: .github/azure-steps.yml parameters: python_version: '$(python.version)' - architecture: 'x64' - -# - job: "TestGPU" -# dependsOn: "Validate" -# strategy: -# matrix: -# Python38LinuxX64_GPU: -# python.version: '3.8' -# pool: -# name: "LinuxX64_GPU" -# steps: -# - template: .github/azure-steps.yml -# parameters: -# python_version: '$(python.version)' -# architecture: 'x64' -# gpu: true -# num_build_jobs: 24 diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index 59549ad99..4023c4456 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -72,7 +72,7 @@ subword_features = true "textcat", assigns=["doc.cats"], default_config={ - "threshold": 0.5, + "threshold": 0.0, "model": DEFAULT_SINGLE_TEXTCAT_MODEL, "scorer": {"@scorers": "spacy.textcat_scorer.v1"}, }, @@ -144,7 +144,8 @@ class TextCategorizer(TrainablePipe): model (thinc.api.Model): The Thinc Model powering the pipeline component. name (str): The component instance name, used to add entries to the losses during training. - threshold (float): Cutoff to consider a prediction "positive". + threshold (float): Unused, not needed for single-label (exclusive + classes) classification. scorer (Optional[Callable]): The scoring method. Defaults to Scorer.score_cats for the attribute "cats". @@ -154,7 +155,11 @@ class TextCategorizer(TrainablePipe): self.model = model self.name = name self._rehearsal_model = None - cfg = {"labels": [], "threshold": threshold, "positive_label": None} + cfg: Dict[str, Any] = { + "labels": [], + "threshold": threshold, + "positive_label": None, + } self.cfg = dict(cfg) self.scorer = scorer diff --git a/spacy/scorer.py b/spacy/scorer.py index 8cd755ac4..16fc303a0 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -446,7 +446,7 @@ class Scorer: labels (Iterable[str]): The set of possible labels. Defaults to []. multi_label (bool): Whether the attribute allows multiple labels. Defaults to True. When set to False (exclusive labels), missing - gold labels are interpreted as 0.0. + gold labels are interpreted as 0.0 and the threshold is set to 0.0. positive_label (str): The positive label for a binary task with exclusive classes. Defaults to None. threshold (float): Cutoff to consider a prediction "positive". Defaults @@ -471,6 +471,8 @@ class Scorer: """ if threshold is None: threshold = 0.5 if multi_label else 0.0 + if not multi_label: + threshold = 0.0 f_per_type = {label: PRFScore() for label in labels} auc_per_type = {label: ROCAUCScore() for label in labels} labels = set(labels) @@ -505,20 +507,18 @@ class Scorer: # Get the highest-scoring for each. pred_label, pred_score = max(pred_cats.items(), key=lambda it: it[1]) gold_label, gold_score = max(gold_cats.items(), key=lambda it: it[1]) - if pred_label == gold_label and pred_score >= threshold: + if pred_label == gold_label: f_per_type[pred_label].tp += 1 else: f_per_type[gold_label].fn += 1 - if pred_score >= threshold: - f_per_type[pred_label].fp += 1 + f_per_type[pred_label].fp += 1 elif gold_cats: gold_label, gold_score = max(gold_cats, key=lambda it: it[1]) if gold_score > 0: f_per_type[gold_label].fn += 1 elif pred_cats: pred_label, pred_score = max(pred_cats.items(), key=lambda it: it[1]) - if pred_score >= threshold: - f_per_type[pred_label].fp += 1 + f_per_type[pred_label].fp += 1 micro_prf = PRFScore() for label_prf in f_per_type.values(): micro_prf.tp += label_prf.tp diff --git a/spacy/tests/doc/test_json_doc_conversion.py b/spacy/tests/doc/test_json_doc_conversion.py index 19698cfb2..11a1817e6 100644 --- a/spacy/tests/doc/test_json_doc_conversion.py +++ b/spacy/tests/doc/test_json_doc_conversion.py @@ -370,3 +370,12 @@ def test_json_to_doc_validation_error(doc): doc_json.pop("tokens") with pytest.raises(ValueError): Doc(doc.vocab).from_json(doc_json, validate=True) + + +def test_to_json_underscore_doc_getters(doc): + def get_text_length(doc): + return len(doc.text) + + Doc.set_extension("text_length", getter=get_text_length) + doc_json = doc.to_json(underscore=["text_length"]) + assert doc_json["_"]["text_length"] == get_text_length(doc) diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index 0bb036a33..d359b77db 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -823,10 +823,10 @@ def test_textcat_loss(multi_label: bool, expected_loss: float): assert loss == expected_loss -def test_textcat_threshold(): +def test_textcat_multilabel_threshold(): # Ensure the scorer can be called with a different threshold nlp = English() - nlp.add_pipe("textcat") + nlp.add_pipe("textcat_multilabel") train_examples = [] for text, annotations in TRAIN_DATA_SINGLE_LABEL: @@ -849,7 +849,7 @@ def test_textcat_threshold(): ) pos_f = scores["cats_score"] assert scores["cats_f_per_type"]["POSITIVE"]["r"] == 1.0 - assert pos_f > macro_f + assert pos_f >= macro_f def test_textcat_multi_threshold(): diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py index 6e15fa2de..b903f1669 100644 --- a/spacy/tests/test_scorer.py +++ b/spacy/tests/test_scorer.py @@ -474,3 +474,50 @@ def test_prf_score(): assert (a.precision, a.recall, a.fscore) == approx( (c.precision, c.recall, c.fscore) ) + + +def test_score_cats(en_tokenizer): + text = "some text" + gold_doc = en_tokenizer(text) + gold_doc.cats = {"POSITIVE": 1.0, "NEGATIVE": 0.0} + pred_doc = en_tokenizer(text) + pred_doc.cats = {"POSITIVE": 0.75, "NEGATIVE": 0.25} + example = Example(pred_doc, gold_doc) + # threshold is ignored for multi_label=False + scores1 = Scorer.score_cats( + [example], + "cats", + labels=list(gold_doc.cats.keys()), + multi_label=False, + positive_label="POSITIVE", + threshold=0.1, + ) + scores2 = Scorer.score_cats( + [example], + "cats", + labels=list(gold_doc.cats.keys()), + multi_label=False, + positive_label="POSITIVE", + threshold=0.9, + ) + assert scores1["cats_score"] == 1.0 + assert scores2["cats_score"] == 1.0 + assert scores1 == scores2 + # threshold is relevant for multi_label=True + scores = Scorer.score_cats( + [example], + "cats", + labels=list(gold_doc.cats.keys()), + multi_label=True, + threshold=0.9, + ) + assert scores["cats_macro_f"] == 0.0 + # threshold is relevant for multi_label=True + scores = Scorer.score_cats( + [example], + "cats", + labels=list(gold_doc.cats.keys()), + multi_label=True, + threshold=0.1, + ) + assert scores["cats_macro_f"] == 0.5 diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 295f91c28..f2621292c 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -1668,6 +1668,20 @@ cdef class Doc: if underscore: user_keys = set() + # Handle doc attributes with .get to include values from getters + # and not only values stored in user_data, for backwards + # compatibility + for attr in underscore: + if self.has_extension(attr): + if "_" not in data: + data["_"] = {} + value = self._.get(attr) + if not srsly.is_json_serializable(value): + raise ValueError(Errors.E107.format(attr=attr, value=repr(value))) + data["_"][attr] = value + user_keys.add(attr) + # Token and span attributes only include values stored in user_data + # and not values generated by getters if self.user_data: for data_key, value in self.user_data.copy().items(): if type(data_key) == tuple and len(data_key) >= 4 and data_key[0] == "._.": @@ -1678,20 +1692,15 @@ cdef class Doc: user_keys.add(attr) if not srsly.is_json_serializable(value): raise ValueError(Errors.E107.format(attr=attr, value=repr(value))) - # Check if doc attribute - if start is None: - if "_" not in data: - data["_"] = {} - data["_"][attr] = value - # Check if token attribute - elif end is None: + # Token attribute + if start is not None and end is None: if "underscore_token" not in data: data["underscore_token"] = {} if attr not in data["underscore_token"]: data["underscore_token"][attr] = [] data["underscore_token"][attr].append({"start": start, "value": value}) - # Else span attribute - else: + # Span attribute + elif start is not None and end is not None: if "underscore_span" not in data: data["underscore_span"] = {} if attr not in data["underscore_span"]: diff --git a/spacy/util.py b/spacy/util.py index 3034808ba..76a1e0bfa 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -443,9 +443,9 @@ def load_model_from_package( name: str, *, vocab: Union["Vocab", bool] = True, - disable: Union[str, Iterable[str]] = SimpleFrozenList(), - enable: Union[str, Iterable[str]] = SimpleFrozenList(), - exclude: Union[str, Iterable[str]] = SimpleFrozenList(), + disable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, + enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, + exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), ) -> "Language": """Load a model from an installed package. @@ -619,9 +619,9 @@ def load_model_from_init_py( init_file: Union[Path, str], *, vocab: Union["Vocab", bool] = True, - disable: Union[str, Iterable[str]] = SimpleFrozenList(), - enable: Union[str, Iterable[str]] = SimpleFrozenList(), - exclude: Union[str, Iterable[str]] = SimpleFrozenList(), + disable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, + enable: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, + exclude: Union[str, Iterable[str]] = _DEFAULT_EMPTY_PIPES, config: Union[Dict[str, Any], Config] = SimpleFrozenDict(), ) -> "Language": """Helper function to use in the `load()` method of a model package's diff --git a/website/docs/api/scorer.md b/website/docs/api/scorer.md index ca3462aa9..9ef36e6fc 100644 --- a/website/docs/api/scorer.md +++ b/website/docs/api/scorer.md @@ -229,16 +229,17 @@ The reported `{attr}_score` depends on the classification properties: > print(scores["cats_macro_auc"]) > ``` -| Name | Description | -| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | -| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ | -| `attr` | The attribute to score. ~~str~~ | -| _keyword-only_ | | -| `getter` | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the cats for an individual `Doc`. ~~Callable[[Doc, str], Dict[str, float]]~~ | -| labels | The set of possible labels. Defaults to `[]`. ~~Iterable[str]~~ | -| `multi_label` | Whether the attribute allows multiple labels. Defaults to `True`. ~~bool~~ | -| `positive_label` | The positive label for a binary task with exclusive classes. Defaults to `None`. ~~Optional[str]~~ | -| **RETURNS** | A dictionary containing the scores, with inapplicable scores as `None`. ~~Dict[str, Optional[float]]~~ | +| Name | Description | +| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ | +| `attr` | The attribute to score. ~~str~~ | +| _keyword-only_ | | +| `getter` | Defaults to `getattr`. If provided, `getter(doc, attr)` should return the cats for an individual `Doc`. ~~Callable[[Doc, str], Dict[str, float]]~~ | +| labels | The set of possible labels. Defaults to `[]`. ~~Iterable[str]~~ | +| `multi_label` | Whether the attribute allows multiple labels. Defaults to `True`. When set to `False` (exclusive labels), missing gold labels are interpreted as `0.0` and the threshold is set to `0.0`. ~~bool~~ | +| `positive_label` | The positive label for a binary task with exclusive classes. Defaults to `None`. ~~Optional[str]~~ | +| `threshold` | Cutoff to consider a prediction "positive". Defaults to `0.5` for multi-label, and `0.0` (i.e. whatever's highest scoring) otherwise. ~~float~~ | +| **RETURNS** | A dictionary containing the scores, with inapplicable scores as `None`. ~~Dict[str, Optional[float]]~~ | ## Scorer.score_links {#score_links tag="staticmethod" new="3"} diff --git a/website/docs/api/textcategorizer.md b/website/docs/api/textcategorizer.md index 042b4ab76..f5f8706ec 100644 --- a/website/docs/api/textcategorizer.md +++ b/website/docs/api/textcategorizer.md @@ -63,7 +63,6 @@ architectures and their arguments and hyperparameters. > ```python > from spacy.pipeline.textcat import DEFAULT_SINGLE_TEXTCAT_MODEL > config = { -> "threshold": 0.5, > "model": DEFAULT_SINGLE_TEXTCAT_MODEL, > } > nlp.add_pipe("textcat", config=config) @@ -82,7 +81,7 @@ architectures and their arguments and hyperparameters. | Setting | Description | | ----------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ | +| `threshold` | Cutoff to consider a prediction "positive", relevant for `textcat_multilabel` when calculating accuracy scores. ~~float~~ | | `model` | A model instance that predicts scores for each category. Defaults to [TextCatEnsemble](/api/architectures#TextCatEnsemble). ~~Model[List[Doc], List[Floats2d]]~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ | @@ -123,7 +122,7 @@ shortcut for this and instantiate the component using its string name and | `model` | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. ~~Model[List[Doc], List[Floats2d]]~~ | | `name` | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~ | | _keyword-only_ | | -| `threshold` | Cutoff to consider a prediction "positive", relevant when printing accuracy results. ~~float~~ | +| `threshold` | Cutoff to consider a prediction "positive", relevant for `textcat_multilabel` when calculating accuracy scores. ~~float~~ | | `scorer` | The scoring method. Defaults to [`Scorer.score_cats`](/api/scorer#score_cats) for the attribute `"cats"`. ~~Optional[Callable]~~ | ## TextCategorizer.\_\_call\_\_ {#call tag="method"} diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md index f096890cb..64bbf8e7b 100644 --- a/website/docs/usage/rule-based-matching.md +++ b/website/docs/usage/rule-based-matching.md @@ -1792,7 +1792,7 @@ the entity `Span` – for example `._.orgs` or `._.prev_orgs` and > [`Doc.retokenize`](/api/doc#retokenize) context manager: > > ```python -> with doc.retokenize() as retokenize: +> with doc.retokenize() as retokenizer: > for ent in doc.ents: > retokenizer.merge(ent) > ```