Merge branch 'predict-cli' of https://github.com/kadarakos/spaCy into predict-cli

This commit is contained in:
kadarakos 2022-11-23 11:05:46 +00:00
commit 490bce3cd8

View File

@ -14,16 +14,16 @@ from ..vocab import Vocab
from ..util import ensure_path, load_model from ..util import ensure_path, load_model
path_help = ("Location of the documents to predict on." path_help = ("Location of the documents to predict on. "
"Can be a single file in .spacy format or " "Can be a single file in .spacy format or a "
".jsonl file and files with other extensions" ".jsonl file. Files with other extensions "
"are treated as single plain text documents." "are treated as single plain text documents. "
"If a directory is provided " "If a directory is provided "
"it is traversed recursively to grab all files to " "it is traversed recursively to grab all files to "
"be processed. The files can be a mixture of .spacy" "be processed. The files can be a mixture of .spacy, "
".jsonl and text files. If .jsonl is provided the " ".jsonl and text files. If .jsonl is provided the "
"specified field is going to be grabbed ('text') " "specified field is going to be grabbed ('text' "
"by default.") "by default).")
out_help = "Path where to save the result .spacy file" out_help = "Path where to save the result .spacy file"
code_help = ("Path to Python file with additional " code_help = ("Path to Python file with additional "
"code (registered functions) to be imported") "code (registered functions) to be imported")
@ -44,7 +44,7 @@ def _stream_docbin(path: Path, vocab: Vocab) -> Iterable[Doc]:
yield doc yield doc
def _stream_jsonl(path: Path, field) -> Iterable[str]: def _stream_jsonl(path: Path, field: str) -> Iterable[str]:
""" """
Stream "text" field from JSONL. If the field "text" is Stream "text" field from JSONL. If the field "text" is
not found it raises error. not found it raises error.
@ -86,7 +86,7 @@ def apply_cli(
Apply a trained pipeline to documents to get predictions. Apply a trained pipeline to documents to get predictions.
Expects a loadable spaCy pipeline and path to the data, which Expects a loadable spaCy pipeline and path to the data, which
can be a directory or a file. can be a directory or a file.
The data files can be provided multiple formats: The data files can be provided in multiple formats:
1. .spacy files 1. .spacy files
2. .jsonl files with a specified "field" to read the text from. 2. .jsonl files with a specified "field" to read the text from.
3. Files with any other extension are assumed to be containing 3. Files with any other extension are assumed to be containing