diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg
index 6bd1ed24d..d7fc46ea0 100644
--- a/spacy/default_config.cfg
+++ b/spacy/default_config.cfg
@@ -2,7 +2,6 @@
 train = null
 dev = null
 vectors = null
-vocab_data = null
 init_tok2vec = null
 
 [system]
@@ -11,8 +10,13 @@ gpu_allocator = null
 
 [nlp]
 lang = null
+# List of pipeline component names, in order. The names should correspond to
+# components defined in the [components block]
 pipeline = []
+# Components that are loaded but disabled by default
 disabled = []
+# Optional callbacks to modify the nlp object before it's initialized, after
+# it's created and after the pipeline has been set up
 before_creation = null
 after_creation = null
 after_pipeline_creation = null
@@ -20,6 +24,7 @@ after_pipeline_creation = null
 [nlp.tokenizer]
 @tokenizers = "spacy.Tokenizer.v1"
 
+# The pipeline components and their models
 [components]
 
 # Readers for corpora like dev and train.
@@ -38,8 +43,7 @@ max_length = 0
 limit = 0
 # Apply some simply data augmentation, where we replace tokens with variations.
 # This is especially useful for punctuation and case replacement, to help
-# generalize beyond corpora that don't have smart-quotes, or only have smart
-# quotes, etc.
+# generalize beyond corpora that don't/only have smart quotes etc.
 augmenter = null
 
 [corpora.dev]
@@ -53,6 +57,7 @@ gold_preproc = false
 max_length = 0
 # Limitation on number of training examples
 limit = 0
+# Optional callback for data augmentation
 augmenter = null
 
 # Training hyper-parameters and additional features.
@@ -102,17 +107,18 @@ use_averages = false
 eps = 1e-8
 learn_rate = 0.001
 
-# The 'initialize' step is run before training or pretraining. Components and
-# the tokenizer can each define their own arguments via their .initialize
-# methods that are populated by the config. This lets them gather resources like
-# lookup tables and build label sets, construct vocabularies, etc.
+# These settings are used when nlp.initialize() is called (typically before
+# training or pretraining). Components and the tokenizer can each define their
+# own arguments via their initialize methods that are populated by the config.
+# This lets them gather data resources, build label sets etc.
 [initialize]
-vocab_data = ${paths.vocab_data}
-lookups = null
 vectors = ${paths.vectors}
 # Extra resources for transfer-learning or pseudo-rehearsal
 init_tok2vec = ${paths.init_tok2vec}
+# Data and lookups for vocabulary
+vocab_data = null
+lookups = null
 # Arguments passed to the tokenizer's initialize method
 tokenizer = {}
-# Arguments passed to the initialize methods of the components (keyed by component name)
+# Arguments for initialize methods of the components (keyed by component)
 components = {}