From e794d4ae39b65aed341fa588ed4d473644aec672 Mon Sep 17 00:00:00 2001
From: Peter Baumgartner <5107405+pmbaumgartner@users.noreply.github.com>
Date: Wed, 28 Sep 2022 11:16:05 -0400
Subject: [PATCH 01/16] `debug data` Spancat Table Improvements (#11504)

* update

* fix format function

* pull out _format_number

* format with black
---
 spacy/cli/_util.py      |  9 +++++++++
 spacy/cli/debug_data.py | 29 ++++++++++++++++++++++++-----
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index ae43b991b..897964a88 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -573,3 +573,12 @@ def setup_gpu(use_gpu: int, silent=None) -> None:
         local_msg.info("Using CPU")
         if gpu_is_available():
             local_msg.info("To switch to GPU 0, use the option: --gpu-id 0")
+
+
+def _format_number(number: Union[int, float], ndigits: int = 2) -> str:
+    """Formats a number (float or int) rounding to `ndigits`, without truncating trailing 0s,
+    as happens with `round(number, ndigits)`"""
+    if isinstance(number, float):
+        return f"{number:.{ndigits}f}"
+    else:
+        return str(number)
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index bd05471b1..963d5b926 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -9,7 +9,7 @@ import typer
 import math
 
 from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
-from ._util import import_code, debug_cli
+from ._util import import_code, debug_cli, _format_number
 from ..training import Example, remove_bilu_prefix
 from ..training.initialize import get_sourced_components
 from ..schemas import ConfigSchemaTraining
@@ -989,7 +989,8 @@ def _get_kl_divergence(p: Counter, q: Counter) -> float:
 def _format_span_row(span_data: List[Dict], labels: List[str]) -> List[Any]:
     """Compile into one list for easier reporting"""
     d = {
-        label: [label] + list(round(d[label], 2) for d in span_data) for label in labels
+        label: [label] + list(_format_number(d[label]) for d in span_data)
+        for label in labels
     }
     return list(d.values())
 
@@ -1004,6 +1005,10 @@ def _get_span_characteristics(
         label: _gmean(l)
         for label, l in compiled_gold["spans_length"][spans_key].items()
     }
+    spans_per_type = {
+        label: len(spans)
+        for label, spans in compiled_gold["spans_per_type"][spans_key].items()
+    }
     min_lengths = [min(l) for l in compiled_gold["spans_length"][spans_key].values()]
     max_lengths = [max(l) for l in compiled_gold["spans_length"][spans_key].values()]
 
@@ -1031,6 +1036,7 @@ def _get_span_characteristics(
     return {
         "sd": span_distinctiveness,
         "bd": sb_distinctiveness,
+        "spans_per_type": spans_per_type,
         "lengths": span_length,
         "min_length": min(min_lengths),
         "max_length": max(max_lengths),
@@ -1045,12 +1051,15 @@ def _get_span_characteristics(
 
 def _print_span_characteristics(span_characteristics: Dict[str, Any]):
     """Print all span characteristics into a table"""
-    headers = ("Span Type", "Length", "SD", "BD")
+    headers = ("Span Type", "Length", "SD", "BD", "N")
+    # Wasabi has this at 30 by default, but we might have some long labels
+    max_col = max(30, max(len(label) for label in span_characteristics["labels"]))
     # Prepare table data with all span characteristics
     table_data = [
         span_characteristics["lengths"],
         span_characteristics["sd"],
         span_characteristics["bd"],
+        span_characteristics["spans_per_type"],
     ]
     table = _format_span_row(
         span_data=table_data, labels=span_characteristics["labels"]
@@ -1061,8 +1070,18 @@ def _print_span_characteristics(span_characteristics: Dict[str, Any]):
         span_characteristics["avg_sd"],
         span_characteristics["avg_bd"],
     ]
-    footer = ["Wgt. Average"] + [str(round(f, 2)) for f in footer_data]
-    msg.table(table, footer=footer, header=headers, divider=True)
+
+    footer = (
+        ["Wgt. Average"] + ["{:.2f}".format(round(f, 2)) for f in footer_data] + ["-"]
+    )
+    msg.table(
+        table,
+        footer=footer,
+        header=headers,
+        divider=True,
+        aligns=["l"] + ["r"] * (len(footer_data) + 1),
+        max_col=max_col,
+    )
 
 
 def _get_spans_length_freq_dist(

From 6d7630c5d372cda53b88a18b10bb893ce478d294 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 29 Sep 2022 10:44:06 +0200
Subject: [PATCH 02/16] Allow overriding spacy_version in spacy package meta
 (#11552)

---
 spacy/cli/package.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index b8c8397b6..324c5d1bb 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -299,8 +299,8 @@ def get_meta(
     }
     nlp = util.load_model_from_path(Path(model_path))
     meta.update(nlp.meta)
-    meta.update(existing_meta)
     meta["spacy_version"] = util.get_minor_version_range(about.__version__)
+    meta.update(existing_meta)
     meta["vectors"] = {
         "width": nlp.vocab.vectors_length,
         "vectors": len(nlp.vocab.vectors),

From ba63f57f81441d049da52c5d398e5b226019a1a6 Mon Sep 17 00:00:00 2001
From: Paul O'Leary McCann <polm@dampfkraft.com>
Date: Thu, 29 Sep 2022 18:50:29 +0900
Subject: [PATCH 03/16] Update docs to reflect Doc input to Language (#11555)

---
 website/docs/api/language.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/website/docs/api/language.md b/website/docs/api/language.md
index ed763e36a..767a7450a 100644
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@@ -164,6 +164,9 @@ examples, see the
 Apply the pipeline to some text. The text can span multiple sentences, and can
 contain arbitrary whitespace. Alignment into the original string is preserved.
 
+Instead of text, a `Doc` can be passed as input, in which case tokenization is
+skipped, but the rest of the pipeline is run.
+
 > #### Example
 >
 > ```python
@@ -173,7 +176,7 @@ contain arbitrary whitespace. Alignment into the original string is preserved.
 
 | Name            | Description                                                                                                                                    |
 | --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
-| `text`          | The text to be processed. ~~str~~                                                                                                              |
+| `text`          | The text to be processed, or a Doc. ~~Union[str, Doc]~~                                                                                        |
 | _keyword-only_  |                                                                                                                                                |
 | `disable`       | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). ~~List[str]~~                                                |
 | `component_cfg` | Optional dictionary of keyword arguments for components, keyed by component names. Defaults to `None`. ~~Optional[Dict[str, Dict[str, Any]]]~~ |
@@ -184,6 +187,9 @@ contain arbitrary whitespace. Alignment into the original string is preserved.
 Process texts as a stream, and yield `Doc` objects in order. This is usually
 more efficient than processing texts one-by-one.
 
+Instead of text, a `Doc` object can be passed as input. In this case
+tokenization is skipped but the rest of the pipeline is run.
+
 > #### Example
 >
 > ```python
@@ -194,7 +200,7 @@ more efficient than processing texts one-by-one.
 
 | Name                                       | Description                                                                                                                                                         |
 | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `texts`                                    | A sequence of strings. ~~Iterable[str]~~                                                                                                                            |
+| `texts`                                    | A sequence of strings (or `Doc` objects). ~~Iterable[Union[str, Doc]]~~                                                                                             |
 | _keyword-only_                             |                                                                                                                                                                     |
 | `as_tuples`                                | If set to `True`, inputs should be a sequence of `(text, context)` tuples. Output will then be a sequence of `(doc, context)` tuples. Defaults to `False`. ~~bool~~ |
 | `batch_size`                               | The number of texts to buffer. ~~Optional[int]~~                                                                                                                    |

From bcda8bc1e720e999243d23ce620181fcad7e8e46 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Thu, 29 Sep 2022 14:24:40 +0200
Subject: [PATCH 04/16] update mypy to latest version (#11546)

* update mypy and disable it for python 3.6

* ignoring mypy's type redefinition error
---
 .github/azure-steps.yml       | 2 +-
 requirements.txt              | 2 +-
 spacy/pipeline/entityruler.py | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
index c7722391f..9d57219ca 100644
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@@ -27,7 +27,7 @@ steps:
 
   - script: python -m mypy spacy
     displayName: 'Run mypy'
-    condition: ne(variables['python_version'], '3.10')
+    condition: ne(variables['python_version'], '3.6')
 
   - task: DeleteFiles@1
     inputs:
diff --git a/requirements.txt b/requirements.txt
index e45fde787..446560c06 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -30,7 +30,7 @@ pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
 flake8>=3.8.0,<3.10.0
 hypothesis>=3.27.0,<7.0.0
-mypy>=0.910,<0.970; platform_machine!='aarch64'
+mypy>=0.980,<0.990; platform_machine != "aarch64" and python_version >= "3.7"
 types-dataclasses>=0.1.3; python_version < "3.7"
 types-mock>=0.1.1
 types-setuptools>=57.0.0
diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py
index 3cb1ca676..8154a077d 100644
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@@ -1,6 +1,5 @@
-import warnings
 from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable, Sequence
-from typing import cast
+import warnings
 from collections import defaultdict
 from pathlib import Path
 import srsly
@@ -317,7 +316,7 @@ class EntityRuler(Pipe):
                     phrase_pattern["id"] = ent_id
                 phrase_patterns.append(phrase_pattern)
             for entry in token_patterns + phrase_patterns:  # type: ignore[operator]
-                label = entry["label"]
+                label = entry["label"]  # type: ignore
                 if "id" in entry:
                     ent_label = label
                     label = self._create_label(label, entry["id"])

From ff9002b726cfdae083a9a0206e1ef615f19a6088 Mon Sep 17 00:00:00 2001
From: Gabriele Picco <piccogabriele@gmail.com>
Date: Thu, 29 Sep 2022 16:34:44 +0100
Subject: [PATCH 05/16] Add Zshot Spacy plugin (#11557)

* Add Zshot Spacy plugin

Add Zshot (Zero and Few shot named entity & relationships recognition) Spacy plugin

* Update website/meta/universe.json

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update website/meta/universe.json

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
---
 website/meta/universe.json | 57 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/website/meta/universe.json b/website/meta/universe.json
index 9ec0d6c0e..a6a1a0fc7 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -1,5 +1,62 @@
 {
     "resources": [
+        {
+            "id": "Zshot",
+            "title": "Zshot",
+            "slogan": "Zero and Few shot named entity & relationships recognition",
+            "github": "ibm/zshot",
+            "pip": "zshot",
+            "code_example": [
+                "import spacy",
+                "from zshot import PipelineConfig, displacy",
+                "from zshot.linker import LinkerRegen",
+                "from zshot.mentions_extractor import MentionsExtractorSpacy",
+                "from zshot.utils.data_models import Entity",
+                "",
+                "nlp = spacy.load('en_core_web_sm')",
+                "# zero shot definition of entities",
+                "nlp_config = PipelineConfig(",
+                "    mentions_extractor=MentionsExtractorSpacy(),",
+                "    linker=LinkerRegen(),",
+                "    entities=[",
+                "        Entity(name='Paris',",
+                "               description='Paris is located in northern central France, in a north-bending arc of the river Seine'),",
+                "        Entity(name='IBM',",
+                "               description='International Business Machines Corporation (IBM) is an American multinational technology corporation headquartered in Armonk, New York'),",
+                "        Entity(name='New York', description='New York is a city in U.S. state'),",
+                "        Entity(name='Florida', description='southeasternmost U.S. state'),",
+                "        Entity(name='American',",
+                "              description='American, something of, from, or related to the United States of America, commonly known as the United States or America'),",
+                "        Entity(name='Chemical formula',",
+                "               description='In chemistry, a chemical formula is a way of presenting information about the chemical proportions of atoms that constitute a particular chemical compound or molecul'),",
+                "        Entity(name='Acetamide',",
+                "               description='Acetamide (systematic name: ethanamide) is an organic compound with the formula CH3CONH2. It is the simplest amide derived from acetic acid. It finds some use as a plasticizer and as an industrial solvent.'),",
+                "        Entity(name='Armonk',",
+                "               description='Armonk is a hamlet and census-designated place (CDP) in the town of North Castle, located in Westchester County, New York, United States.'),",
+                "        Entity(name='Acetic Acid',",
+                "               description='Acetic acid, systematically named ethanoic acid, is an acidic, colourless liquid and organic compound with the chemical formula CH3COOH'),",
+                "        Entity(name='Industrial solvent',",
+                "               description='Acetamide (systematic name: ethanamide) is an organic compound with the formula CH3CONH2. It is the simplest amide derived from acetic acid. It finds some use as a plasticizer and as an industrial solvent.'),",
+                "    ]",
+                ")",
+                "nlp.add_pipe('zshot', config=nlp_config, last=True)",
+                "",
+                "text = 'International Business Machines Corporation (IBM) is an American multinational technology corporation' \\",
+                "        ' headquartered in Armonk, New York, with operations in over 171 countries.'",
+                "",
+                "doc = nlp(text)",
+                "displacy.serve(doc, style='ent')"
+            ],
+            "thumb": "https://ibm.github.io/zshot/img/graph.png",
+            "url": "https://ibm.github.io/zshot/",
+            "author": "IBM Research",
+            "author_links": {
+                "github": "ibm",
+                "twitter": "IBMResearch",
+                "website": "https://research.ibm.com/labs/ireland/"
+            },
+            "category": ["scientific", "models", "research"]
+        },
         {
             "id": "concepcy",
             "title": "concepCy",

From 087cc74c6abdd43e04e4313cdcf292edf6187f4b Mon Sep 17 00:00:00 2001
From: Paul O'Leary McCann <polm@dampfkraft.com>
Date: Mon, 3 Oct 2022 18:53:21 +0900
Subject: [PATCH 06/16] Remove mention of 1.7 from issue template (#11570)

It's rare to have anyone using v1 anymore, so this message is no longer
helpful.
---
 .github/ISSUE_TEMPLATE/01_bugs.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/01_bugs.md b/.github/ISSUE_TEMPLATE/01_bugs.md
index 255a5241e..f0d0ba912 100644
--- a/.github/ISSUE_TEMPLATE/01_bugs.md
+++ b/.github/ISSUE_TEMPLATE/01_bugs.md
@@ -10,7 +10,7 @@ about: Use this template if you came across a bug or unexpected behaviour differ
 <!-- Include a code example or the steps that led to the problem. Please try to be as specific as possible. -->
 
 ## Your Environment
-<!-- Include details of your environment. If you're using spaCy 1.7+, you can also type `python -m spacy info --markdown` and copy-paste the result here.-->
+<!-- Include details of your environment. You can also type `python -m spacy info --markdown` and copy-paste the result here.-->
 * Operating System:
 * Python Version Used:
 * spaCy Version Used:

From 70e21dfcad28b044903ba33b2b8831d925151b76 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Mon, 3 Oct 2022 13:04:03 +0200
Subject: [PATCH 07/16] PR to test importlib-metadata (#11569)

* empty commit

* restrict importlib-metadata to lower than 5.0.0

* restrict importlib-metadata also for validate CI step

* set fixed version for CI

* try flake8 5.0.4 in CI validation step

* from importlib-metadata from requirements again
---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index f475b7fdd..2f5201614 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -31,7 +31,7 @@ jobs:
         inputs:
           versionSpec: "3.7"
       - script: |
-          pip install flake8==3.9.2
+          pip install flake8==5.0.4
           python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
         displayName: "flake8"
 

From 8cd77dd54cfc89c2f67ca2412490ef9b49a98518 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Tue, 4 Oct 2022 11:23:04 +0200
Subject: [PATCH 08/16] Sync flake8 version across requirements (#11580)

---
 .pre-commit-config.yaml | 2 +-
 requirements.txt        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b959262e3..df59697b1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
       language_version: python3.7
       additional_dependencies: ['click==8.0.4']
 -   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.2
+    rev: 5.0.4
     hooks:
     - id: flake8
       args:
diff --git a/requirements.txt b/requirements.txt
index 446560c06..14847ff21 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,7 +28,7 @@ cython>=0.25,<3.0
 pytest>=5.2.0,!=7.1.0
 pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
-flake8>=3.8.0,<3.10.0
+flake8>=3.8.0,<6.0.0
 hypothesis>=3.27.0,<7.0.0
 mypy>=0.980,<0.990; platform_machine != "aarch64" and python_version >= "3.7"
 types-dataclasses>=0.1.3; python_version < "3.7"

From ef74f8f5e447dec10ab69d2a7e94f0e09165db75 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Tue, 11 Oct 2022 14:15:22 +0200
Subject: [PATCH 09/16] Fix mypy error in edittree lemmatizer (#11612)

* cleanup imports

* try limiting Thinc to previous release

* remove Model specification

* fix code and revert Thinc constraint
---
 spacy/pipeline/edit_tree_lemmatizer.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index b7d615f6d..7f6367c75 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -1,7 +1,6 @@
 from typing import cast, Any, Callable, Dict, Iterable, List, Optional
-from typing import Sequence, Tuple, Union
+from typing import Tuple
 from collections import Counter
-from copy import deepcopy
 from itertools import islice
 import numpy as np
 
@@ -150,7 +149,7 @@ class EditTreeLemmatizer(TrainablePipe):
             # Handle cases where there are no tokens in any docs.
             n_labels = len(self.cfg["labels"])
             guesses: List[Ints2d] = [
-                self.model.ops.alloc((0, n_labels), dtype="i") for doc in docs
+                self.model.ops.alloc2i(0, n_labels, dtype="i") for _ in docs
             ]
             assert len(guesses) == n_docs
             return guesses

From 29649589fc889a58c8b631d569d4ae378a10aa2b Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Tue, 11 Oct 2022 15:25:05 +0200
Subject: [PATCH 10/16] remove dtype (#11615)

---
 spacy/pipeline/edit_tree_lemmatizer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index 7f6367c75..76b0e0bc9 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -149,7 +149,7 @@ class EditTreeLemmatizer(TrainablePipe):
             # Handle cases where there are no tokens in any docs.
             n_labels = len(self.cfg["labels"])
             guesses: List[Ints2d] = [
-                self.model.ops.alloc2i(0, n_labels, dtype="i") for _ in docs
+                self.model.ops.alloc2i(0, n_labels) for _ in docs
             ]
             assert len(guesses) == n_docs
             return guesses

From 2e52479eec987367117d27fb4f049df2efb2518d Mon Sep 17 00:00:00 2001
From: Paul O'Leary McCann <polm@dampfkraft.com>
Date: Tue, 11 Oct 2022 23:45:05 +0900
Subject: [PATCH 11/16] Fix example code for spacy-wordnet (#11593)

* Fix example code for spacy-wordnet

It looks like in the most recent version, 0.1.0, it's no longer possible
to pass the lang parameter to the component separately. Doing so will
raise an error.

* Apply suggestions from code review

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Cleanup

* More cleanup

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 website/meta/universe.json | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/website/meta/universe.json b/website/meta/universe.json
index a6a1a0fc7..637e9d6ce 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -2460,20 +2460,20 @@
                 "import spacy",
                 "from spacy_wordnet.wordnet_annotator import WordnetAnnotator ",
                 "",
-                "# Load an spacy model (supported models are \"es\" and \"en\") ",
-                "nlp = spacy.load('en')",
-                "# Spacy 3.x",
-                "nlp.add_pipe(\"spacy_wordnet\", after='tagger', config={'lang': nlp.lang})",
-                "# Spacy 2.x",
+                "# Load a spaCy model (supported languages are \"es\" and \"en\") ",
+                "nlp = spacy.load('en_core_web_sm')",
+                "# spaCy 3.x",
+                "nlp.add_pipe(\"spacy_wordnet\", after='tagger')",
+                "# spaCy 2.x",
                 "# nlp.add_pipe(WordnetAnnotator(nlp.lang), after='tagger')",
                 "token = nlp('prices')[0]",
                 "",
-                "# wordnet object link spacy token with nltk wordnet interface by giving acces to",
+                "# WordNet object links spaCy token with NLTK WordNet interface by giving access to",
                 "# synsets and lemmas ",
                 "token._.wordnet.synsets()",
                 "token._.wordnet.lemmas()",
                 "",
-                "# And automatically tags with wordnet domains",
+                "# And automatically add info about WordNet domains",
                 "token._.wordnet.wordnet_domains()"
             ],
             "author": "recognai",

From fe06e037bcd733708401bce082863994b1fc48bd Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 12 Oct 2022 12:18:39 +0200
Subject: [PATCH 12/16] Fix init for pymorphy2_lookup lemmatizer mode (#11631)

---
 spacy/lang/ru/lemmatizer.py            |  2 +-
 spacy/lang/uk/lemmatizer.py            |  2 +-
 spacy/tests/conftest.py                | 17 +++++++++++++++++
 spacy/tests/lang/ru/test_lemmatizer.py | 14 ++++++++++++++
 spacy/tests/lang/uk/test_lemmatizer.py |  8 ++++++++
 5 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/spacy/lang/ru/lemmatizer.py b/spacy/lang/ru/lemmatizer.py
index 85180b1e4..5bf685d44 100644
--- a/spacy/lang/ru/lemmatizer.py
+++ b/spacy/lang/ru/lemmatizer.py
@@ -23,7 +23,7 @@ class RussianLemmatizer(Lemmatizer):
         overwrite: bool = False,
         scorer: Optional[Callable] = lemmatizer_score,
     ) -> None:
-        if mode == "pymorphy2":
+        if mode in {"pymorphy2", "pymorphy2_lookup"}:
             try:
                 from pymorphy2 import MorphAnalyzer
             except ImportError:
diff --git a/spacy/lang/uk/lemmatizer.py b/spacy/lang/uk/lemmatizer.py
index a8bc56057..d4f8cc9e5 100644
--- a/spacy/lang/uk/lemmatizer.py
+++ b/spacy/lang/uk/lemmatizer.py
@@ -18,7 +18,7 @@ class UkrainianLemmatizer(RussianLemmatizer):
         overwrite: bool = False,
         scorer: Optional[Callable] = lemmatizer_score,
     ) -> None:
-        if mode == "pymorphy2":
+        if mode in {"pymorphy2", "pymorphy2_lookup"}:
             try:
                 from pymorphy2 import MorphAnalyzer
             except ImportError:
diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index 742bfcc6a..394ef00d3 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -343,6 +343,14 @@ def ru_lemmatizer():
     return get_lang_class("ru")().add_pipe("lemmatizer")
 
 
+@pytest.fixture
+def ru_lookup_lemmatizer():
+    pytest.importorskip("pymorphy2")
+    return get_lang_class("ru")().add_pipe(
+        "lemmatizer", config={"mode": "pymorphy2_lookup"}
+    )
+
+
 @pytest.fixture(scope="session")
 def sa_tokenizer():
     return get_lang_class("sa")().tokenizer
@@ -422,6 +430,15 @@ def uk_lemmatizer():
     return get_lang_class("uk")().add_pipe("lemmatizer")
 
 
+@pytest.fixture
+def uk_lookup_lemmatizer():
+    pytest.importorskip("pymorphy2")
+    pytest.importorskip("pymorphy2_dicts_uk")
+    return get_lang_class("uk")().add_pipe(
+        "lemmatizer", config={"mode": "pymorphy2_lookup"}
+    )
+
+
 @pytest.fixture(scope="session")
 def ur_tokenizer():
     return get_lang_class("ur")().tokenizer
diff --git a/spacy/tests/lang/ru/test_lemmatizer.py b/spacy/tests/lang/ru/test_lemmatizer.py
index 9ca7f441b..e82fd4f8c 100644
--- a/spacy/tests/lang/ru/test_lemmatizer.py
+++ b/spacy/tests/lang/ru/test_lemmatizer.py
@@ -78,3 +78,17 @@ def test_ru_lemmatizer_punct(ru_lemmatizer):
     assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
     doc = Doc(ru_lemmatizer.vocab, words=["»"], pos=["PUNCT"])
     assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
+
+
+def test_ru_doc_lookup_lemmatization(ru_lookup_lemmatizer):
+    words = ["мама", "мыла", "раму"]
+    pos = ["NOUN", "VERB", "NOUN"]
+    morphs = [
+        "Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing",
+        "Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act",
+        "Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing",
+    ]
+    doc = Doc(ru_lookup_lemmatizer.vocab, words=words, pos=pos, morphs=morphs)
+    doc = ru_lookup_lemmatizer(doc)
+    lemmas = [token.lemma_ for token in doc]
+    assert lemmas == ["мама", "мыла", "раму"]
diff --git a/spacy/tests/lang/uk/test_lemmatizer.py b/spacy/tests/lang/uk/test_lemmatizer.py
index 57dd4198a..788744aa1 100644
--- a/spacy/tests/lang/uk/test_lemmatizer.py
+++ b/spacy/tests/lang/uk/test_lemmatizer.py
@@ -9,3 +9,11 @@ def test_uk_lemmatizer(uk_lemmatizer):
     """Check that the default uk lemmatizer runs."""
     doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
     uk_lemmatizer(doc)
+    assert [token.lemma for token in doc]
+
+
+def test_uk_lookup_lemmatizer(uk_lookup_lemmatizer):
+    """Check that the lookup uk lemmatizer runs."""
+    doc = Doc(uk_lookup_lemmatizer.vocab, words=["a", "b", "c"])
+    uk_lookup_lemmatizer(doc)
+    assert [token.lemma for token in doc]

From 4d869fcc111151bcefa08ee1a2b7b49dc5ecd677 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Wed, 12 Oct 2022 15:17:40 +0200
Subject: [PATCH 13/16] Small fixes to docstrings (#11610)

* add missing scorer arg to docstring

* fix class names in textcat_multilabel

* add missing scorer to docstrings
---
 spacy/pipeline/spancat.py            | 3 +++
 spacy/pipeline/textcat_multilabel.py | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index 1b7a9eecb..ca9f1dab0 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -133,6 +133,9 @@ def make_spancat(
     spans_key (str): Key of the doc.spans dict to save the spans under. During
         initialization and training, the component will look for spans on the
         reference document under the same key.
+    scorer (Optional[Callable]): The scoring method. Defaults to
+        Scorer.score_spans for the Doc.spans[spans_key] with overlapping
+        spans allowed.
     threshold (float): Minimum probability to consider a prediction positive.
         Spans with a positive prediction will be saved on the Doc. Defaults to
         0.5.
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index e33a885f8..119ae3310 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -96,8 +96,8 @@ def make_multilabel_textcat(
     model: Model[List[Doc], List[Floats2d]],
     threshold: float,
     scorer: Optional[Callable],
-) -> "TextCategorizer":
-    """Create a TextCategorizer component. The text categorizer predicts categories
+) -> "MultiLabel_TextCategorizer":
+    """Create a MultiLabel_TextCategorizer component. The text categorizer predicts categories
     over a whole document. It can learn one or more labels, and the labels are considered
     to be non-mutually exclusive, which means that there can be zero or more labels
     per doc).
@@ -105,6 +105,7 @@ def make_multilabel_textcat(
     model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
         scores for each category.
     threshold (float): Cutoff to consider a prediction "positive".
+    scorer (Optional[Callable]): The scoring method.
     """
     return MultiLabel_TextCategorizer(
         nlp.vocab, model, name, threshold=threshold, scorer=scorer
@@ -147,6 +148,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
         name (str): The component instance name, used to add entries to the
             losses during training.
         threshold (float): Cutoff to consider a prediction "positive".
+        scorer (Optional[Callable]): The scoring method.
 
         DOCS: https://spacy.io/api/textcategorizer#init
         """

From 6b5a3e72198aa9735587b0712e3eb2c24234b463 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Fri, 14 Oct 2022 08:16:49 +0200
Subject: [PATCH 14/16] Extend to pydantic v1.10 (#11635)

* Update types in `spacy.schemas` for updated pydantic+mypy
---
 requirements.txt |  2 +-
 setup.cfg        |  2 +-
 spacy/schemas.py | 18 +++++++++---------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 14847ff21..9d6bbb2c4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,7 +15,7 @@ pathy>=0.3.5
 numpy>=1.15.0
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
-pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
+pydantic>=1.7.4,!=1.8,!=1.8.1,<1.11.0
 jinja2
 langcodes>=3.2.0,<4.0.0
 # Official Python utilities
diff --git a/setup.cfg b/setup.cfg
index 2dc5e7042..c2653feba 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -56,7 +56,7 @@ install_requires =
     tqdm>=4.38.0,<5.0.0
     numpy>=1.15.0
     requests>=2.13.0,<3.0.0
-    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
+    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.11.0
     jinja2
     # Official Python utilities
     setuptools
diff --git a/spacy/schemas.py b/spacy/schemas.py
index 048082134..ab71b2016 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -181,12 +181,12 @@ class TokenPatternNumber(BaseModel):
     IS_SUBSET: Optional[List[StrictInt]] = Field(None, alias="is_subset")
     IS_SUPERSET: Optional[List[StrictInt]] = Field(None, alias="is_superset")
     INTERSECTS: Optional[List[StrictInt]] = Field(None, alias="intersects")
-    EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
-    NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
-    GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
-    LEQ: Union[StrictInt, StrictFloat] = Field(None, alias="<=")
-    GT: Union[StrictInt, StrictFloat] = Field(None, alias=">")
-    LT: Union[StrictInt, StrictFloat] = Field(None, alias="<")
+    EQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="==")
+    NEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="!=")
+    GEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">=")
+    LEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<=")
+    GT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">")
+    LT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<")
 
     class Config:
         extra = "forbid"
@@ -430,7 +430,7 @@ class ProjectConfigAssetURL(BaseModel):
     # fmt: off
     dest: StrictStr = Field(..., title="Destination of downloaded asset")
     url: Optional[StrictStr] = Field(None, title="URL of asset")
-    checksum: str = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
+    checksum: Optional[str] = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
     description: StrictStr = Field("", title="Description of asset")
     # fmt: on
 
@@ -438,7 +438,7 @@ class ProjectConfigAssetURL(BaseModel):
 class ProjectConfigAssetGit(BaseModel):
     # fmt: off
     git: ProjectConfigAssetGitItem = Field(..., title="Git repo information")
-    checksum: str = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
+    checksum: Optional[str] = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
     description: Optional[StrictStr] = Field(None, title="Description of asset")
     # fmt: on
 
@@ -508,7 +508,7 @@ class DocJSONSchema(BaseModel):
         None, title="Indices of sentences' start and end indices"
     )
     text: StrictStr = Field(..., title="Document text")
-    spans: Dict[StrictStr, List[Dict[StrictStr, Union[StrictStr, StrictInt]]]] = Field(
+    spans: Optional[Dict[StrictStr, List[Dict[StrictStr, Union[StrictStr, StrictInt]]]]] = Field(
         None, title="Span information - end/start indices, label, KB ID"
     )
     tokens: List[Dict[StrictStr, Union[StrictStr, StrictInt]]] = Field(

From ceb62352bfcad49b3ad63e3e65ef12dabab645b3 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 14 Oct 2022 18:04:55 +0900
Subject: [PATCH 15/16] Auto-format code with black (#11649)

Co-authored-by: explosion-bot <explosion-bot@users.noreply.github.com>
---
 spacy/pipeline/edit_tree_lemmatizer.py | 4 +---
 spacy/schemas.py                       | 6 +++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index 76b0e0bc9..12f9b73a3 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -148,9 +148,7 @@ class EditTreeLemmatizer(TrainablePipe):
         if not any(len(doc) for doc in docs):
             # Handle cases where there are no tokens in any docs.
             n_labels = len(self.cfg["labels"])
-            guesses: List[Ints2d] = [
-                self.model.ops.alloc2i(0, n_labels) for _ in docs
-            ]
+            guesses: List[Ints2d] = [self.model.ops.alloc2i(0, n_labels) for _ in docs]
             assert len(guesses) == n_docs
             return guesses
         scores = self.model.predict(docs)
diff --git a/spacy/schemas.py b/spacy/schemas.py
index ab71b2016..a67d96d9d 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -508,9 +508,9 @@ class DocJSONSchema(BaseModel):
         None, title="Indices of sentences' start and end indices"
     )
     text: StrictStr = Field(..., title="Document text")
-    spans: Optional[Dict[StrictStr, List[Dict[StrictStr, Union[StrictStr, StrictInt]]]]] = Field(
-        None, title="Span information - end/start indices, label, KB ID"
-    )
+    spans: Optional[
+        Dict[StrictStr, List[Dict[StrictStr, Union[StrictStr, StrictInt]]]]
+    ] = Field(None, title="Span information - end/start indices, label, KB ID")
     tokens: List[Dict[StrictStr, Union[StrictStr, StrictInt]]] = Field(
         ..., title="Token information - ID, start, annotations"
     )

From 858565a5671de61334443d6a2348164bc39216e1 Mon Sep 17 00:00:00 2001
From: Paul O'Leary McCann <polm@dampfkraft.com>
Date: Tue, 18 Oct 2022 15:11:39 +0900
Subject: [PATCH 16/16] Fix issues with DVC commands (#11592)

* Fix flag handling in dvc

Prior to this commit, if a flag (--verbose or --quiet) was passed to
DVC, it would be added to the end of the generated dvc command line.
This would result in the command being interpreted as part of the actual
command to run, rather than an argument to dvc. This would result in
command lines like:

    spacy project run preprocess --verbose

That would fail with an error that there's no such directory as
`--verbose`.

This change puts the flags at the front of the dvc command so that they
are interpreted correctly. It removes the `run_dvc_commands` function,
which had been reduced to just a for loop and wasn't used elsewhere.

A separate problem is that there's no way to specify the quiet behaviour
to dvc from the command line, though it's unclear if that's a bug.

* Add dvc quiet flag to docs

* Handle case in DVC where no commands are appropriate

If only have commands with no deps or outputs (admittedly unlikely), you
get a weird error about the dvc file not existing. This gives explicit
output instead.

* Add support for quiet flag

* Fix command execution

Commands are strings now because they're joined further up.
---
 spacy/cli/project/dvc.py | 57 +++++++++++++++++++++-------------------
 website/docs/api/cli.md  |  3 ++-
 2 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/spacy/cli/project/dvc.py b/spacy/cli/project/dvc.py
index 83dc5efbf..a15353855 100644
--- a/spacy/cli/project/dvc.py
+++ b/spacy/cli/project/dvc.py
@@ -25,6 +25,7 @@ def project_update_dvc_cli(
     project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
     workflow: Optional[str] = Arg(None, help=f"Name of workflow defined in {PROJECT_FILE}. Defaults to first workflow if not set."),
     verbose: bool = Opt(False, "--verbose", "-V", help="Print more info"),
+    quiet: bool = Opt(False, "--quiet", "-q", help="Print less info"),
     force: bool = Opt(False, "--force", "-F", help="Force update DVC config"),
     # fmt: on
 ):
@@ -36,7 +37,7 @@ def project_update_dvc_cli(
 
     DOCS: https://spacy.io/api/cli#project-dvc
     """
-    project_update_dvc(project_dir, workflow, verbose=verbose, force=force)
+    project_update_dvc(project_dir, workflow, verbose=verbose, quiet=quiet, force=force)
 
 
 def project_update_dvc(
@@ -44,6 +45,7 @@ def project_update_dvc(
     workflow: Optional[str] = None,
     *,
     verbose: bool = False,
+    quiet: bool = False,
     force: bool = False,
 ) -> None:
     """Update the auto-generated Data Version Control (DVC) config file. A DVC
@@ -54,11 +56,12 @@ def project_update_dvc(
     workflow (Optional[str]): Optional name of workflow defined in project.yml.
         If not set, the first workflow will be used.
     verbose (bool): Print more info.
+    quiet (bool): Print less info.
     force (bool): Force update DVC config.
     """
     config = load_project_config(project_dir)
     updated = update_dvc_config(
-        project_dir, config, workflow, verbose=verbose, force=force
+        project_dir, config, workflow, verbose=verbose, quiet=quiet, force=force
     )
     help_msg = "To execute the workflow with DVC, run: dvc repro"
     if updated:
@@ -72,7 +75,7 @@ def update_dvc_config(
     config: Dict[str, Any],
     workflow: Optional[str] = None,
     verbose: bool = False,
-    silent: bool = False,
+    quiet: bool = False,
     force: bool = False,
 ) -> bool:
     """Re-run the DVC commands in dry mode and update dvc.yaml file in the
@@ -83,7 +86,7 @@ def update_dvc_config(
     path (Path): The path to the project directory.
     config (Dict[str, Any]): The loaded project.yml.
     verbose (bool): Whether to print additional info (via DVC).
-    silent (bool): Don't output anything (via DVC).
+    quiet (bool): Don't output anything (via DVC).
     force (bool): Force update, even if hashes match.
     RETURNS (bool): Whether the DVC config file was updated.
     """
@@ -105,6 +108,14 @@ def update_dvc_config(
         dvc_config_path.unlink()
     dvc_commands = []
     config_commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}
+
+    # some flags that apply to every command
+    flags = []
+    if verbose:
+        flags.append("--verbose")
+    if quiet:
+        flags.append("--quiet")
+
     for name in workflows[workflow]:
         command = config_commands[name]
         deps = command.get("deps", [])
@@ -118,14 +129,26 @@ def update_dvc_config(
         deps_cmd = [c for cl in [["-d", p] for p in deps] for c in cl]
         outputs_cmd = [c for cl in [["-o", p] for p in outputs] for c in cl]
         outputs_nc_cmd = [c for cl in [["-O", p] for p in outputs_no_cache] for c in cl]
-        dvc_cmd = ["run", "-n", name, "-w", str(path), "--no-exec"]
+
+        dvc_cmd = ["run", *flags, "-n", name, "-w", str(path), "--no-exec"]
         if command.get("no_skip"):
             dvc_cmd.append("--always-changed")
         full_cmd = [*dvc_cmd, *deps_cmd, *outputs_cmd, *outputs_nc_cmd, *project_cmd]
         dvc_commands.append(join_command(full_cmd))
+
+    if not dvc_commands:
+        # If we don't check for this, then there will be an error when reading the
+        # config, since DVC wouldn't create it.
+        msg.fail(
+            "No usable commands for DVC found. This can happen if none of your "
+            "commands have dependencies or outputs.",
+            exits=1,
+        )
+
     with working_dir(path):
-        dvc_flags = {"--verbose": verbose, "--quiet": silent}
-        run_dvc_commands(dvc_commands, flags=dvc_flags)
+        for c in dvc_commands:
+            dvc_command = "dvc " + c
+            run_command(dvc_command)
     with dvc_config_path.open("r+", encoding="utf8") as f:
         content = f.read()
         f.seek(0, 0)
@@ -133,26 +156,6 @@ def update_dvc_config(
     return True
 
 
-def run_dvc_commands(
-    commands: Iterable[str] = SimpleFrozenList(), flags: Dict[str, bool] = {}
-) -> None:
-    """Run a sequence of DVC commands in a subprocess, in order.
-
-    commands (List[str]): The string commands without the leading "dvc".
-    flags (Dict[str, bool]): Conditional flags to be added to command. Makes it
-        easier to pass flags like --quiet that depend on a variable or
-        command-line setting while avoiding lots of nested conditionals.
-    """
-    for c in commands:
-        command = split_command(c)
-        dvc_command = ["dvc", *command]
-        # Add the flags if they are set to True
-        for flag, is_active in flags.items():
-            if is_active:
-                dvc_command.append(flag)
-        run_command(dvc_command)
-
-
 def check_workflows(workflows: List[str], workflow: Optional[str] = None) -> None:
     """Validate workflows provided in project.yml and check that a given
     workflow can be used to generate a DVC config.
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index e5cd3089b..fc2c46022 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -1482,7 +1482,7 @@ You'll also need to add the assets you want to track with
 </Infobox>
 
 ```cli
-$ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose]
+$ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose] [--quiet]
 ```
 
 > #### Example
@@ -1499,6 +1499,7 @@ $ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose]
 | `workflow`        | Name of workflow defined in `project.yml`. Defaults to first workflow if not set. ~~Optional[str] \(option)~~ |
 | `--force`, `-F`   | Force-updating config file. ~~bool (flag)~~                                                                   |
 | `--verbose`, `-V` | Print more output generated by DVC. ~~bool (flag)~~                                                           |
+| `--quiet`, `-q`   | Print no output generated by DVC. ~~bool (flag)~~                                                             |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                                                    |
 | **CREATES**       | A `dvc.yaml` file in the project directory, based on the steps defined in the given workflow.                 |