From cdb40c9af9a361b6595c3e2c1fcf1f41cefcf827 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Wed, 23 Nov 2022 09:58:58 +0100 Subject: [PATCH 1/3] Update spacy/cli/apply.py Co-authored-by: Sofie Van Landeghem --- spacy/cli/apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/apply.py b/spacy/cli/apply.py index c789c46a2..918b47252 100644 --- a/spacy/cli/apply.py +++ b/spacy/cli/apply.py @@ -86,7 +86,7 @@ def apply_cli( Apply a trained pipeline to documents to get predictions. Expects a loadable spaCy pipeline and path to the data, which can be a directory or a file. - The data files can be provided multiple formats: + The data files can be provided in multiple formats: 1. .spacy files 2. .jsonl files with a specified "field" to read the text from. 3. Files with any other extension are assumed to be containing From 506508ac1c430f181ce5921f637e7b76f9881ff3 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Wed, 23 Nov 2022 10:00:14 +0100 Subject: [PATCH 2/3] Update spacy/cli/apply.py Co-authored-by: Sofie Van Landeghem --- spacy/cli/apply.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/apply.py b/spacy/cli/apply.py index 918b47252..442b53c32 100644 --- a/spacy/cli/apply.py +++ b/spacy/cli/apply.py @@ -44,7 +44,7 @@ def _stream_docbin(path: Path, vocab: Vocab) -> Iterable[Doc]: yield doc -def _stream_jsonl(path: Path, field) -> Iterable[str]: +def _stream_jsonl(path: Path, field: str) -> Iterable[str]: """ Stream "text" field from JSONL. If the field "text" is not found it raises error. From 7fdf30b313f0dd0ed8523e768f53e97f60029097 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Wed, 23 Nov 2022 10:50:13 +0100 Subject: [PATCH 3/3] Update spacy/cli/apply.py Co-authored-by: Sofie Van Landeghem --- spacy/cli/apply.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/spacy/cli/apply.py b/spacy/cli/apply.py index 442b53c32..6f8065576 100644 --- a/spacy/cli/apply.py +++ b/spacy/cli/apply.py @@ -14,16 +14,16 @@ from ..vocab import Vocab from ..util import ensure_path, load_model -path_help = ("Location of the documents to predict on." - "Can be a single file in .spacy format or " - ".jsonl file and files with other extensions" - "are treated as single plain text documents." +path_help = ("Location of the documents to predict on. " + "Can be a single file in .spacy format or a " + ".jsonl file. Files with other extensions " + "are treated as single plain text documents. " "If a directory is provided " "it is traversed recursively to grab all files to " - "be processed. The files can be a mixture of .spacy" + "be processed. The files can be a mixture of .spacy, " ".jsonl and text files. If .jsonl is provided the " - "specified field is going to be grabbed ('text') " - "by default.") + "specified field is going to be grabbed ('text' " + "by default).") out_help = "Path where to save the result .spacy file" code_help = ("Path to Python file with additional " "code (registered functions) to be imported")