mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-06 21:30:22 +03:00
addressing reviews
This commit is contained in:
parent
1efef3f216
commit
e75722cd15
|
@ -14,18 +14,15 @@ from ..vocab import Vocab
|
||||||
from ..util import ensure_path, load_model
|
from ..util import ensure_path, load_model
|
||||||
|
|
||||||
|
|
||||||
path_help = (
|
path_help = """Location of the documents to predict on.
|
||||||
"Location of the documents to predict on. "
|
Can be a single file in .spacy format or a .jsonl file.
|
||||||
"Can be a single file in .spacy format or a "
|
Files with other extensions are treated as single plain text documents.
|
||||||
".jsonl file. Files with other extensions "
|
If a directory is provided it is traversed recursively to grab
|
||||||
"are treated as single plain text documents. "
|
all files to be processed.
|
||||||
"If a directory is provided "
|
The files can be a mixture of .spacy, .jsonl and text files.
|
||||||
"it is traversed recursively to grab all files to "
|
If .jsonl is provided the specified field is going
|
||||||
"be processed. The files can be a mixture of .spacy, "
|
to be grabbed ("text" by default)."""
|
||||||
".jsonl and text files. If .jsonl is provided the "
|
|
||||||
"specified field is going to be grabbed ('text' "
|
|
||||||
"by default)."
|
|
||||||
)
|
|
||||||
out_help = "Path to save the resulting .spacy file"
|
out_help = "Path to save the resulting .spacy file"
|
||||||
code_help = (
|
code_help = (
|
||||||
"Path to Python file with additional " "code (registered functions) to be imported"
|
"Path to Python file with additional " "code (registered functions) to be imported"
|
||||||
|
@ -56,7 +53,7 @@ def _stream_jsonl(path: Path, field: str) -> Iterable[str]:
|
||||||
"""
|
"""
|
||||||
for entry in srsly.read_jsonl(path):
|
for entry in srsly.read_jsonl(path):
|
||||||
if field not in entry:
|
if field not in entry:
|
||||||
raise msg.fail(
|
msg.fail(
|
||||||
f"{path} does not contain the required '{field}' field.", exits=1
|
f"{path} does not contain the required '{field}' field.", exits=1
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -117,11 +114,14 @@ def apply(
|
||||||
batch_size: int,
|
batch_size: int,
|
||||||
n_process: int,
|
n_process: int,
|
||||||
):
|
):
|
||||||
|
docbin = DocBin(store_user_data=True)
|
||||||
|
paths = walk_directory(data_path)
|
||||||
|
if len(paths) == 0:
|
||||||
|
msg.fail("Did not find data to process,"
|
||||||
|
f" {data_path} seems to be an empty directory.", exits=1)
|
||||||
nlp = load_model(model)
|
nlp = load_model(model)
|
||||||
msg.good(f"Loaded model {model}")
|
msg.good(f"Loaded model {model}")
|
||||||
vocab = nlp.vocab
|
vocab = nlp.vocab
|
||||||
docbin = DocBin(store_user_data=True)
|
|
||||||
paths = walk_directory(data_path)
|
|
||||||
streams: List[DocOrStrStream] = []
|
streams: List[DocOrStrStream] = []
|
||||||
text_files = []
|
text_files = []
|
||||||
for path in paths:
|
for path in paths:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user