diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg
index 86293fd40..c0fd27c3c 100644
--- a/spacy/default_config.cfg
+++ b/spacy/default_config.cfg
@@ -1,8 +1,9 @@
 [paths]
 train = ""
 dev = ""
-init_tok2vec = null
+vectors = null
 vocab_data = null
+init_tok2vec = null
 
 [system]
 seed = 0
@@ -96,19 +97,16 @@ eps = 1e-8
 learn_rate = 0.001
 
 # The 'initialize' step is run before training or pretraining. Components and
-# the tokenizer can each define their own prepare step, giving them a chance
-# to gather resources like lookup-tables, build label sets, construct vocabularies,
-# etc. After 'prepare' is finished, the result will be saved out to disk, which
-# will then be read in at the start of training. You can call the prepare step
-# separately with the `spacy prepare` command, or you can let the train script
-# do it for you.
+# the tokenizer can each define their own arguments via their .initialize
+# methods that are populated by the config. This lets them gather resources like
+# lookup tables and build label sets, construct vocabularies, etc.
 [initialize]
-tokenizer = {}
-components = {}
-
-[initialize.vocab]
-data = ${paths.vocab_data}
+vocab_data = ${paths.vocab_data}
 lookups = null
-vectors = null
+vectors = ${paths.vectors}
 # Extra resources for transfer-learning or pseudo-rehearsal
 init_tok2vec = ${paths.init_tok2vec}
+# Arguments passed to the tokenizer's initialize method
+tokenizer = {}
+# Arguments passed to the initialize methods of the components (keyed by component name)
+components = {}
diff --git a/spacy/language.py b/spacy/language.py
index ec2e42a35..ee73faed3 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1188,14 +1188,13 @@ class Language:
         config = self.config.interpolate()
         # These are the settings provided in the [initialize] block in the config
         I = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
-        V = I["vocab"]
         init_vocab(
-            self, data=V["data"], lookups=V["lookups"], vectors=V["vectors"],
+            self, data=I["vocab_data"], lookups=I["lookups"], vectors=I["vectors"],
         )
         pretrain_cfg = config.get("pretraining")
         if pretrain_cfg:
             P = registry.resolve(pretrain_cfg, schema=ConfigSchemaPretrain)
-            init_tok2vec(self, P, V)
+            init_tok2vec(self, P, I)
         if self.vocab.vectors.data.shape[1] >= 1:
             ops = get_current_ops()
             self.vocab.vectors.data = ops.asarray(self.vocab.vectors.data)
diff --git a/spacy/schemas.py b/spacy/schemas.py
index 0b2eeba68..658eeb574 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -357,12 +357,14 @@ class ConfigSchemaPretrain(BaseModel):
         arbitrary_types_allowed = True
 
 
-class ConfigSchemaInitVocab(BaseModel):
+class ConfigSchemaInit(BaseModel):
     # fmt: off
-    data: Optional[StrictStr] = Field(..., title="Path to JSON-formatted vocabulary file")
+    vocab_data: Optional[StrictStr] = Field(..., title="Path to JSON-formatted vocabulary file")
     lookups: Optional[Lookups] = Field(..., title="Vocabulary lookups, e.g. lexeme normalization")
     vectors: Optional[StrictStr] = Field(..., title="Path to vectors")
     init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
+    tokenizer: Dict[StrictStr, Any] = Field(..., help="Arguments to be passed into Tokenizer.initialize")
+    components: Dict[StrictStr, Dict[StrictStr, Any]] = Field(..., help="Arguments for Pipe.initialize methods of pipeline components, keyed by component")
     # fmt: on
 
     class Config:
@@ -370,16 +372,6 @@ class ConfigSchemaInitVocab(BaseModel):
         arbitrary_types_allowed = True
 
 
-class ConfigSchemaInit(BaseModel):
-    vocab: ConfigSchemaInitVocab
-    tokenizer: Any
-    components: Dict[StrictStr, Any]
-
-    class Config:
-        extra = "forbid"
-        arbitrary_types_allowed = True
-
-
 class ConfigSchema(BaseModel):
     training: ConfigSchemaTraining
     nlp: ConfigSchemaNlp
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index 862c76448..aa5edde5d 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -121,15 +121,15 @@ def load_vectors_into_model(
 
 
 def init_tok2vec(
-    nlp: "Language", pretrain_config: Dict[str, Any], vocab_config: Dict[str, Any]
+    nlp: "Language", pretrain_config: Dict[str, Any], init_config: Dict[str, Any]
 ) -> bool:
     # Load pretrained tok2vec weights - cf. CLI command 'pretrain'
     P = pretrain_config
-    V = vocab_config
+    I = init_config
     weights_data = None
-    init_tok2vec = ensure_path(V["init_tok2vec"])
+    init_tok2vec = ensure_path(I["init_tok2vec"])
     if init_tok2vec is not None:
-        if P["objective"].get("type") == "vectors" and not V["vectors"]:
+        if P["objective"].get("type") == "vectors" and not I["vectors"]:
             err = 'need initialize.vocab.vectors if pretraining.objective.type is "vectors"'
             errors = [{"loc": ["initialize", "vocab"], "msg": err}]
             raise ConfigValidationError(config=nlp.config, errors=errors)