Add cli for finding locations of registered func (#12757)

* Add cli for finding locations of registered func

* fixes: naming and typing

* isort

* update naming

* remove to find-function

* remove file:// bit

* use registry name if given and exit gracefully if a registry was not found

* clean up failure msg

* specify registry_name options

* mypy fixes

* return location for internal usage

* add documentation

* more mypy fixes

* clean up example

* add section to menu

* add tests

---------

Co-authored-by: svlandeg <svlandeg@github.com>
This commit is contained in:
Victoria 2023-07-31 09:39:00 +02:00 committed by GitHub
parent 51b9655470
commit 49055ed7c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 133 additions and 7 deletions

View File

@ -14,6 +14,7 @@ from .debug_diff import debug_diff # noqa: F401
from .debug_model import debug_model # noqa: F401 from .debug_model import debug_model # noqa: F401
from .download import download # noqa: F401 from .download import download # noqa: F401
from .evaluate import evaluate # noqa: F401 from .evaluate import evaluate # noqa: F401
from .find_function import find_function # noqa: F401
from .find_threshold import find_threshold # noqa: F401 from .find_threshold import find_threshold # noqa: F401
from .info import info # noqa: F401 from .info import info # noqa: F401
from .init_config import fill_config, init_config # noqa: F401 from .init_config import fill_config, init_config # noqa: F401

View File

@ -0,0 +1,69 @@
from typing import Optional, Tuple
from catalogue import RegistryError
from wasabi import msg
from ..util import registry
from ._util import Arg, Opt, app
@app.command("find-function")
def find_function_cli(
# fmt: off
func_name: str = Arg(..., help="Name of the registered function."),
registry_name: Optional[str] = Opt(None, "--registry", "-r", help="Name of the catalogue registry."),
# fmt: on
):
"""
Find the module, path and line number to the file the registered
function is defined in, if available.
func_name (str): Name of the registered function.
registry_name (Optional[str]): Name of the catalogue registry.
DOCS: https://spacy.io/api/cli#find-function
"""
if not registry_name:
registry_names = registry.get_registry_names()
for name in registry_names:
if registry.has(name, func_name):
registry_name = name
break
if not registry_name:
msg.fail(
f"Couldn't find registered function: '{func_name}'",
exits=1,
)
assert registry_name is not None
find_function(func_name, registry_name)
def find_function(func_name: str, registry_name: str) -> Tuple[str, int]:
registry_desc = None
try:
registry_desc = registry.find(registry_name, func_name)
except RegistryError as e:
msg.fail(
f"Couldn't find registered function: '{func_name}' in registry '{registry_name}'",
)
msg.fail(f"{e}", exits=1)
assert registry_desc is not None
registry_path = None
line_no = None
if registry_desc["file"]:
registry_path = registry_desc["file"]
line_no = registry_desc["line_no"]
if not registry_path or not line_no:
msg.fail(
f"Couldn't find path to registered function: '{func_name}' in registry '{registry_name}'",
exits=1,
)
assert registry_path is not None
assert line_no is not None
msg.good(f"Found registered function '{func_name}' at {registry_path}:{line_no}")
return str(registry_path), int(line_no)

View File

@ -233,3 +233,37 @@ def test_project_push_pull(project_dir):
result = CliRunner().invoke(app, ["project", "pull", remote, str(project_dir)]) result = CliRunner().invoke(app, ["project", "pull", remote, str(project_dir)])
assert result.exit_code == 0 assert result.exit_code == 0
assert test_file.is_file() assert test_file.is_file()
def test_find_function_valid():
# example of architecture in main code base
function = "spacy.TextCatBOW.v2"
result = CliRunner().invoke(app, ["find-function", function, "-r", "architectures"])
assert f"Found registered function '{function}'" in result.stdout
assert "textcat.py" in result.stdout
result = CliRunner().invoke(app, ["find-function", function])
assert f"Found registered function '{function}'" in result.stdout
assert "textcat.py" in result.stdout
# example of architecture in spacy-legacy
function = "spacy.TextCatBOW.v1"
result = CliRunner().invoke(app, ["find-function", function])
assert f"Found registered function '{function}'" in result.stdout
assert "spacy_legacy" in result.stdout
assert "textcat.py" in result.stdout
def test_find_function_invalid():
# invalid registry
function = "spacy.TextCatBOW.v2"
registry = "foobar"
result = CliRunner().invoke(
app, ["find-function", function, "--registry", registry]
)
assert f"Unknown function registry: '{registry}'" in result.stdout
# invalid function
function = "spacy.TextCatBOW.v666"
result = CliRunner().invoke(app, ["find-function", function])
assert f"Couldn't find registered function: '{function}'" in result.stdout

View File

@ -7,6 +7,7 @@ menu:
- ['info', 'info'] - ['info', 'info']
- ['validate', 'validate'] - ['validate', 'validate']
- ['init', 'init'] - ['init', 'init']
- ['find-function', 'find-function']
- ['convert', 'convert'] - ['convert', 'convert']
- ['debug', 'debug'] - ['debug', 'debug']
- ['train', 'train'] - ['train', 'train']
@ -251,6 +252,27 @@ $ python -m spacy init labels [config_path] [output_path] [--code] [--verbose] [
| overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~ | | overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~ |
| **CREATES** | The label files. | | **CREATES** | The label files. |
## find-function {id="find-function",version="3.7",tag="command"}
Find the module, path and line number to the file for a given registered
function. This functionality is helpful to understand where registered
functions, as used in the config file, are defined.
```bash
$ python -m spacy find-function [func_name] [--registry]
```
> #### Example
>
> ```bash
> $ python -m spacy find-function spacy.TextCatBOW.v1
> ```
| Name | Description |
| ------------------ | ----------------------------------------------------- |
| `func_name` | Name of the registered function. ~~str (positional)~~ |
| `--registry`, `-r` | Name of the catalogue registry. ~~str (option)~~ |
## convert {id="convert",tag="command"} ## convert {id="convert",tag="command"}
Convert files into spaCy's Convert files into spaCy's
@ -1651,10 +1673,10 @@ $ python -m spacy huggingface-hub push [whl_path] [--org] [--msg] [--verbose]
> $ python -m spacy huggingface-hub push en_ner_fashion-0.0.0-py3-none-any.whl > $ python -m spacy huggingface-hub push en_ner_fashion-0.0.0-py3-none-any.whl
> ``` > ```
| Name | Description | | Name | Description |
| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | | ----------------- | ------------------------------------------------------------------------------------------------------------------- |
| `whl_path` | The path to the `.whl` file packaged with [`spacy package`](https://spacy.io/api/cli#package). ~~Path(positional)~~ | | `whl_path` | The path to the `.whl` file packaged with [`spacy package`](https://spacy.io/api/cli#package). ~~Path(positional)~~ |
| `--org`, `-o` | Optional name of organization to which the pipeline should be uploaded. ~~str (option)~~ | | `--org`, `-o` | Optional name of organization to which the pipeline should be uploaded. ~~str (option)~~ |
| `--msg`, `-m` | Commit message to use for update. Defaults to `"Update spaCy pipeline"`. ~~str (option)~~ | | `--msg`, `-m` | Commit message to use for update. Defaults to `"Update spaCy pipeline"`. ~~str (option)~~ |
| `--verbose`, `-V` | Output additional info for debugging, e.g. the full generated hub metadata. ~~bool (flag)~~ | | `--verbose`, `-V` | Output additional info for debugging, e.g. the full generated hub metadata. ~~bool (flag)~~ |
| **UPLOADS** | The pipeline to the hub. | | **UPLOADS** | The pipeline to the hub. |