* Read json files recursively from a directory, instead of requiring a single .json file

2025-11-08 03:47:39 +03:00 · 2015-05-29 03:52:55 +02:00 · 2015-05-29 03:52:55 +02:00 · b76bbbd12c
commit b76bbbd12c
parent 8f31d3b864
2 changed files with 29 additions and 23 deletions
--- a/bin/parser/train.py
+++ b/bin/parser/train.py
@ -138,8 +138,8 @@ def write_parses(Language, dev_loc, model_dir, out_loc):
@plac.annotations(
-    train_loc=("Location of training json file"),
+    train_loc=("Location of training file or directory"),
-    dev_loc=("Location of development json file"),
+    dev_loc=("Location of development file or directory"),
    corruption_level=("Amount of noise to add to training data", "option", "c", float),
    model_dir=("Location of output model directory",),
    out_loc=("Out location", "option", "o", str),
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -4,6 +4,8 @@ import json
 import ijson
 import random
 import re
 import os
 from os import path
 from spacy.munge.read_ner import tags_to_entities
 from libc.string cimport memset
@ -94,6 +96,10 @@ def _min_edit_path(cand_words, gold_words):
 def read_json_file(loc):
    if path.isdir(loc):
        for filename in os.listdir(loc):
            yield from read_json_file(path.join(loc, filename))
    else:
        with open(loc) as file_:
            for doc in ijson.items(file_, 'item'):
                paragraphs = []