diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py
index 2ffbeb458..f4bddac39 100644
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@@ -2,24 +2,27 @@ from pathlib import Path
 from wasabi import Printer
 import srsly
 import re
+import sys
 
-from .converters import conllu2json, iob2json, conll_ner2json
-from .converters import ner_jsonl2json
+from ..tokens import DocBin
+from ..gold.converters import iob2docs, conll_ner2docs, json2docs
 
 
 # Converters are matched by file extension except for ner/iob, which are
 # matched by file extension and content. To add a converter, add a new
 # entry to this dict with the file extension mapped to the converter function
 # imported from /converters.
+
 CONVERTERS = {
-    "conllubio": conllu2json,
-    "conllu": conllu2json,
-    "conll": conllu2json,
-    "ner": conll_ner2json,
-    "iob": iob2json,
-    "jsonl": ner_jsonl2json,
+    #"conllubio": conllu2docs, TODO
+    #"conllu": conllu2docs, TODO
+    #"conll": conllu2docs, TODO
+    "ner": conll_ner2docs,
+    "iob": iob2docs,
+    "json": json2docs,
 }
 
+
 # File types
 FILE_TYPES = ("json", "jsonl", "msg")
 FILE_TYPES_STDOUT = ("json", "jsonl")
@@ -27,89 +30,58 @@ FILE_TYPES_STDOUT = ("json", "jsonl")
 
 def convert(
     # fmt: off
-    input_file: ("Input file", "positional", None, str),
-    output_dir: ("Output directory. '-' for stdout.", "positional", None, str) = "-",
-    file_type: (f"Type of data to produce: {FILE_TYPES}", "option", "t", str, FILE_TYPES) = "json",
+    input_path: ("Input file or directory", "positional", None, Path),
+    output_dir: ("Output directory.", "positional", None, Path),
+    file_type: (f"Type of data to produce: {FILE_TYPES}", "option", "t", str, FILE_TYPES) = "spacy",
     n_sents: ("Number of sentences per doc (0 to disable)", "option", "n", int) = 1,
     seg_sents: ("Segment sentences (for -c ner)", "flag", "s") = False,
     model: ("Model for sentence segmentation (for -s)", "option", "b", str) = None,
     morphology: ("Enable appending morphology to tags", "flag", "m", bool) = False,
     merge_subtokens: ("Merge CoNLL-U subtokens", "flag", "T", bool) = False,
     converter: (f"Converter: {tuple(CONVERTERS.keys())}", "option", "c", str) = "auto",
-    ner_map_path: ("NER tag mapping (as JSON-encoded dict of entity types)", "option", "N", Path) = None,
+    ner_map: ("NER tag mapping (as JSON-encoded dict of entity types)", "option", "N", Path) = None,
     lang: ("Language (if tokenizer required)", "option", "l", str) = None,
     # fmt: on
 ):
     """
-    Convert files into JSON format for use with train command and other
-    experiment management functions. If no output_dir is specified, the data
-    is written to stdout, so you can pipe them forward to a JSON file:
-    $ spacy convert some_file.conllu > some_file.json
+    Convert files into json or DocBin format for use with train command and other
+    experiment management functions.
     """
+    cli_args = locals()
     no_print = output_dir == "-"
+    output_dir = Path(output_dir) if output_dir != "-" else "-"
     msg = Printer(no_print=no_print)
-    input_path = Path(input_file)
-    if file_type not in FILE_TYPES_STDOUT and output_dir == "-":
-        # TODO: support msgpack via stdout in srsly?
-        msg.fail(
-            f"Can't write .{file_type} data to stdout",
-            "Please specify an output directory.",
-            exits=1,
+    verify_cli_args(msg, **cli_args)
+    converter = _get_converter(msg, converter, input_path)
+    ner_map = srsly.read_json(ner_map) if ner_map is not None else None
+    for input_loc in walk_directory(input_path):
+        input_data = input_loc.open("r", encoding="utf-8").read()
+        # Use converter function to convert data
+        func = CONVERTERS[converter]
+        docs = func(
+            input_data,
+            n_sents=n_sents,
+            seg_sents=seg_sents,
+            append_morphology=morphology,
+            merge_subtokens=merge_subtokens,
+            lang=lang,
+            model=model,
+            no_print=no_print,
+            ner_map=ner_map,
         )
-    if not input_path.exists():
-        msg.fail("Input file not found", input_path, exits=1)
-    if output_dir != "-" and not Path(output_dir).exists():
-        msg.fail("Output directory not found", output_dir, exits=1)
-    input_data = input_path.open("r", encoding="utf-8").read()
-    if converter == "auto":
-        converter = input_path.suffix[1:]
-    if converter == "ner" or converter == "iob":
-        converter_autodetect = autodetect_ner_format(input_data)
-        if converter_autodetect == "ner":
-            msg.info("Auto-detected token-per-line NER format")
-            converter = converter_autodetect
-        elif converter_autodetect == "iob":
-            msg.info("Auto-detected sentence-per-line NER format")
-            converter = converter_autodetect
-        else:
-            msg.warn(
-                "Can't automatically detect NER format. Conversion may not succeed. See https://spacy.io/api/cli#convert"
-            )
-    if converter not in CONVERTERS:
-        msg.fail(f"Can't find converter for {converter}", exits=1)
-    ner_map = None
-    if ner_map_path is not None:
-        ner_map = srsly.read_json(ner_map_path)
-    # Use converter function to convert data
-    func = CONVERTERS[converter]
-    data = func(
-        input_data,
-        n_sents=n_sents,
-        seg_sents=seg_sents,
-        append_morphology=morphology,
-        merge_subtokens=merge_subtokens,
-        lang=lang,
-        model=model,
-        no_print=no_print,
-        ner_map=ner_map,
-    )
-    if output_dir != "-":
-        # Export data to a file
         suffix = f".{file_type}"
-        output_file = Path(output_dir) / Path(input_path.parts[-1]).with_suffix(suffix)
+        subpath = input_loc.relative_to(input_path)
+        output_file = (output_dir / subpath).with_suffix(suffix)
+        if not output_file.parent.exists():
+            output_file.parent.mkdir(parents=True)
         if file_type == "json":
-            srsly.write_json(output_file, data)
-        elif file_type == "jsonl":
-            srsly.write_jsonl(output_file, data)
-        elif file_type == "msg":
-            srsly.write_msgpack(output_file, data)
-        msg.good(f"Generated output file ({len(data)} documents): {output_file}")
-    else:
-        # Print to stdout
-        if file_type == "json":
-            srsly.write_json("-", data)
-        elif file_type == "jsonl":
-            srsly.write_jsonl("-", data)
+            data = docs2json(docs)
+            srsly.write_json(output_file, docs2json(docs))
+        else:
+            data = DocBin(docs=docs).to_bytes()
+            with output_file.open("wb") as file_:
+                file_.write(data)
+        msg.good(f"Generated output file ({len(docs)} documents): {output_file}")
 
 
 def autodetect_ner_format(input_data):
@@ -129,3 +101,102 @@ def autodetect_ner_format(input_data):
     if format_guesses["ner"] == 0 and format_guesses["iob"] > 0:
         return "iob"
     return None
+
+
+def walk_directory(path):
+    if not path.is_dir():
+        return [path]
+    paths = [path]
+    locs = []
+    seen = set()
+    for path in paths:
+        if str(path) in seen:
+            continue
+        seen.add(str(path))
+        if path.parts[-1].startswith("."):
+            continue
+        elif path.is_dir():
+            paths.extend(path.iterdir())
+        else:
+            locs.append(path)
+    return locs
+
+
+def verify_cli_args(
+    msg,
+    input_path,
+    output_dir,
+    file_type,
+    n_sents,
+    seg_sents,
+    model,
+    morphology,
+    merge_subtokens,
+    converter,
+    ner_map,
+    lang
+):
+    if converter == "ner" or converter == "iob":
+        input_data = input_path.open("r", encoding="utf-8").read()
+        converter_autodetect = autodetect_ner_format(input_data)
+        if converter_autodetect == "ner":
+            msg.info("Auto-detected token-per-line NER format")
+            converter = converter_autodetect
+        elif converter_autodetect == "iob":
+            msg.info("Auto-detected sentence-per-line NER format")
+            converter = converter_autodetect
+        else:
+            msg.warn(
+                "Can't automatically detect NER format. Conversion may not",
+                "succeed. See https://spacy.io/api/cli#convert"
+            )
+    if file_type not in FILE_TYPES_STDOUT and output_dir == "-":
+        # TODO: support msgpack via stdout in srsly?
+        msg.fail(
+            f"Can't write .{file_type} data to stdout",
+            "Please specify an output directory.",
+            exits=1,
+        )
+    if not input_path.exists():
+        msg.fail("Input file not found", input_path, exits=1)
+    if output_dir != "-" and not Path(output_dir).exists():
+        msg.fail("Output directory not found", output_dir, exits=1)
+    if input_path.is_dir():
+        input_locs = walk_directory(input_path)
+        if len(input_locs) == 0:
+            msg.fail("No input files in directory", input_path, exits=1)
+        file_types = list(set([loc.suffix[1:] for loc in input_locs]))
+        if len(file_types) >= 2:
+            file_types = ",".join(file_types)
+            msg.fail("All input files must be same type", file_types, exits=1)
+        if converter == "auto":
+            converter = file_types[0]
+    else:
+        converter = input_path.suffix[1:]
+    if converter not in CONVERTERS:
+        msg.fail(f"Can't find converter for {converter}", exits=1)
+    return converter
+ 
+
+def _get_converter(msg, converter, input_path):
+    if input_path.is_dir():
+        input_path = walk_directory(input_path)[0]
+    if converter == "auto":
+        converter = input_path.suffix[1:]
+    if converter == "ner" or converter == "iob":
+        with input_path.open() as file_:
+            input_data = file_.read()
+        converter_autodetect = autodetect_ner_format(input_data)
+        if converter_autodetect == "ner":
+            msg.info("Auto-detected token-per-line NER format")
+            converter = converter_autodetect
+        elif converter_autodetect == "iob":
+            msg.info("Auto-detected sentence-per-line NER format")
+            converter = converter_autodetect
+        else:
+            msg.warn(
+                "Can't automatically detect NER format. "
+                "Conversion may not succeed. "
+                "See https://spacy.io/api/cli#convert"
+            )
+    return converter
diff --git a/spacy/cli/converters/__init__.py b/spacy/cli/converters/__init__.py
deleted file mode 100644
index e44ad407d..000000000
--- a/spacy/cli/converters/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .conllu2json import conllu2json  # noqa: F401
-from .iob2json import iob2docs # noqa: F401
-from .conll_ner2json import conll_ner2json  # noqa: F401
-from .jsonl2docs import ner_jsonl2json  # noqa: F401
diff --git a/spacy/cli/converters/jsonl2json.py b/spacy/cli/converters/jsonl2json.py
deleted file mode 100644
index 8639a11b9..000000000
--- a/spacy/cli/converters/jsonl2json.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import srsly
-
-from ...gold import docs_to_json
-from ...util import get_lang_class, minibatch
-
-
-def ner_jsonl2docs(input_data, lang=None, n_sents=10, use_morphology=False, **_):
-    if lang is None:
-        raise ValueError("No --lang specified, but tokenization required")
-    docs = []
-    input_examples = [srsly.json_loads(line) for line in input_data.strip().split("\n")]
-    nlp = get_lang_class(lang)()
-    sentencizer = nlp.create_pipe("sentencizer")
-    for i, batch in enumerate(minibatch(input_examples, size=n_sents)):
-        docs = []
-        # TODO: Should we be merging these? We're disrespecting the n_sents
-        # currently.
-        for record in batch:
-            raw_text = record["text"]
-            if "entities" in record:
-                ents = record["entities"]
-            else:
-                ents = record["spans"]
-            ents = [(e["start"], e["end"], e["label"]) for e in ents]
-            doc = nlp.make_doc(raw_text)
-            sentencizer(doc)
-            spans = [doc.char_span(s, e, label=L) for s, e, L in ents]
-            doc.ents = _cleanup_spans(spans)
-            docs.append(doc)
-    return docs
-
-
-def _cleanup_spans(spans):
-    output = []
-    seen = set()
-    for span in spans:
-        if span is not None:
-            # Trim whitespace
-            while len(span) and span[0].is_space:
-                span = span[1:]
-            while len(span) and span[-1].is_space:
-                span = span[:-1]
-            if not len(span):
-                continue
-            for i in range(span.start, span.end):
-                if i in seen:
-                    break
-            else:
-                output.append(span)
-                seen.update(range(span.start, span.end))
-    return output
diff --git a/spacy/gold/converters/__init__.py b/spacy/gold/converters/__init__.py
new file mode 100644
index 000000000..0a1242fb4
--- /dev/null
+++ b/spacy/gold/converters/__init__.py
@@ -0,0 +1,6 @@
+from .iob2docs import iob2docs # noqa: F401
+from .conll_ner2docs import conll_ner2docs  # noqa: F401
+from .json2docs import json2docs
+
+# TODO: Update this one
+#from .conllu2docs import conllu2docs  # noqa: F401
diff --git a/spacy/cli/converters/conll_ner2json.py b/spacy/gold/converters/conll_ner2docs.py
similarity index 99%
rename from spacy/cli/converters/conll_ner2json.py
rename to spacy/gold/converters/conll_ner2docs.py
index 8d4139bde..7042bd7d6 100644
--- a/spacy/cli/converters/conll_ner2json.py
+++ b/spacy/gold/converters/conll_ner2docs.py
@@ -7,7 +7,7 @@ from ...vocab import Vocab
 from ...util import load_model
 
 
-def conll_ner2doc(
+def conll_ner2docs(
     input_data, n_sents=10, seg_sents=False, model=None, no_print=False, **kwargs
 ):
     """
diff --git a/spacy/cli/converters/conllu2json.py b/spacy/gold/converters/conllu2json.py
similarity index 100%
rename from spacy/cli/converters/conllu2json.py
rename to spacy/gold/converters/conllu2json.py
diff --git a/spacy/cli/converters/iob2json.py b/spacy/gold/converters/iob2docs.py
similarity index 97%
rename from spacy/cli/converters/iob2json.py
rename to spacy/gold/converters/iob2docs.py
index 2addc1af4..7901569fa 100644
--- a/spacy/cli/converters/iob2json.py
+++ b/spacy/gold/converters/iob2docs.py
@@ -3,7 +3,7 @@ from wasabi import Printer
 from ...gold import iob_to_biluo, tags_to_entities
 from ...util import minibatch
 from .util import merge_sentences
-from .conll_ner2json import n_sents_info
+from .conll_ner2docs import n_sents_info
 
 
 def iob2docs(input_data, n_sents=10, no_print=False, *args, **kwargs):
diff --git a/spacy/gold/converters/json2docs.py b/spacy/gold/converters/json2docs.py
new file mode 100644
index 000000000..98219bb04
--- /dev/null
+++ b/spacy/gold/converters/json2docs.py
@@ -0,0 +1,38 @@
+import tempfile
+import contextlib
+import shutil
+from pathlib import Path
+from ..gold_io import read_json_file
+from ..example import annotations2doc
+from ..example import _fix_legacy_dict_data, _parse_example_dict_data
+from ...util import load_model
+from ...lang.xx import MultiLanguage
+
+@contextlib.contextmanager
+def make_tempdir():
+    d = Path(tempfile.mkdtemp())
+    yield d
+    shutil.rmtree(str(d))
+
+
+def json2docs(
+    input_data,
+    model=None,
+    **kwargs
+):
+    nlp = load_model(model) if model is not None else MultiLanguage()
+    docs = []
+    with make_tempdir() as tmp_dir:
+        json_path = Path(tmp_dir) / "data.json"
+        with (json_path).open("w") as file_:
+            file_.write(input_data)
+        for json_annot in read_json_file(json_path):
+            example_dict = _fix_legacy_dict_data(json_annot)
+            tok_dict, doc_dict = _parse_example_dict_data(example_dict)
+            doc = annotations2doc(
+                nlp.vocab,
+                tok_dict,
+                doc_dict
+            )
+            docs.append(doc)
+    return docs
diff --git a/spacy/gold/converters/util.py b/spacy/gold/converters/util.py
new file mode 100644
index 000000000..ed9c84203
--- /dev/null
+++ b/spacy/gold/converters/util.py
@@ -0,0 +1,5 @@
+def merge_sentences(docs, n_sents):
+    merged = []
+    for group in minibatch(docs, size=n_sents):
+        raise NotImplementedError
+    return merged
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index 7bf3faab3..3072787ae 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -9,6 +9,19 @@ from ..attrs import SPACY, ORTH, intify_attr
 from ..errors import Errors
 
 
+ALL_ATTRS = (
+    "ORTH",
+    "TAG",
+    "HEAD",
+    "DEP",
+    "SENT_START",
+    "ENT_IOB",
+    "ENT_TYPE",
+    "LEMMA",
+    "MORPH"
+)
+
+
 class DocBin(object):
     """Pack Doc objects for binary serialization.
 
@@ -39,7 +52,7 @@ class DocBin(object):
     document from the DocBin.
     """
 
-    def __init__(self, attrs=None, store_user_data=False, docs=[]):
+    def __init__(self, attrs=ALL_ATTRS, store_user_data=False, docs=[]):
         """Create a DocBin object to hold serialized annotations.
 
         attrs (list): List of attributes to serialize. 'orth' and 'spacy' are
@@ -49,7 +62,6 @@ class DocBin(object):
 
         DOCS: https://spacy.io/api/docbin#init
         """
-        attrs = attrs or []
         attrs = sorted([intify_attr(attr) for attr in attrs])
         self.attrs = [attr for attr in attrs if attr != ORTH and attr != SPACY]
         self.attrs.insert(0, ORTH)  # Ensure ORTH is always attrs[0]
@@ -60,7 +72,7 @@ class DocBin(object):
         self.strings = set()
         self.store_user_data = store_user_data
         for doc in docs:
-            self.add(docs)
+            self.add(doc)
 
     def __len__(self):
         """RETURNS: The number of Doc objects added to the DocBin."""