diff --git a/setup.cfg b/setup.cfg
index 4d0a88c35..bcb85eef3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -53,7 +53,7 @@ install_requires =
 
 [options.extras_require]
 lookups =
-    spacy_lookups_data>=0.0.4<0.2.0
+    spacy_lookups_data>=0.0.5<0.2.0
 cuda =
     thinc_gpu_ops>=0.0.1,<0.1.0
     cupy>=5.0.0b4
diff --git a/spacy/errors.py b/spacy/errors.py
index 2ef5d1ce4..51565ade6 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -324,7 +324,9 @@ class Errors(object):
     E101 = ("NODE_NAME should be a new node and NBOR_NAME should already have "
             "have been declared in previous edges.")
     E102 = ("Can't merge non-disjoint spans. '{token}' is already part of "
-            "tokens to merge.")
+            "tokens to merge. If you want to find the longest non-overlapping "
+            "spans, you can use the util.filter_spans helper:\n"
+            "https://spacy.io/api/top-level#util.filter_spans")
     E103 = ("Trying to set conflicting doc.ents: '{span1}' and '{span2}'. A "
             "token can only be part of one entity, so make sure the entities "
             "you're setting don't overlap.")
diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md
index 4128fa73f..e593731d4 100644
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@@ -1086,6 +1086,14 @@ with doc.retokenize() as retokenizer:
 print("After:", [token.text for token in doc])
 ```
 
+> #### Tip: merging entities and noun phrases
+>
+> If you need to merge named entities or noun chunks, check out the built-in
+> [`merge_entities`](/api/pipeline-functions#merge_entities) and
+> [`merge_noun_chunks`](/api/pipeline-functions#merge_noun_chunks) pipeline
+> components. When added to your pipeline using `nlp.add_pipe`, they'll take
+> care of merging the spans automatically.
+
 If an attribute in the `attrs` is a context-dependent token attribute, it will
 be applied to the underlying [`Token`](/api/token). For example `LEMMA`, `POS`
 or `DEP` only apply to a word in context, so they're token attributes. If an
@@ -1094,16 +1102,24 @@ underlying [`Lexeme`](/api/lexeme), the entry in the vocabulary. For example,
 `LOWER` or `IS_STOP` apply to all words of the same spelling, regardless of the
 context.
 
-<Infobox title="Tip: merging entities and noun phrases">
+<Infobox variant="warning" title="Note on merging overlapping spans">
 
-If you need to merge named entities or noun chunks, check out the built-in
-[`merge_entities`](/api/pipeline-functions#merge_entities) and
-[`merge_noun_chunks`](/api/pipeline-functions#merge_noun_chunks) pipeline
-components. When added to your pipeline using `nlp.add_pipe`, they'll take care
-of merging the spans automatically.
+If you're trying to merge spans that overlap, spaCy will raise an error because
+it's unclear how the result should look. Depending on the application, you may
+want to match the shortest or longest possible span, so it's up to you to filter
+them. If you're looking for the longest non-overlapping span, you can use the
+[`util.filter_spans`](/api/top-level#util.filter_spans) helper:
+
+```python
+doc = nlp("I live in Berlin Kreuzberg")
+spans = [doc[3:5], doc[3:4], doc[4:5]]
+filtered_spans = filter_spans(spans)
+```
 
 </Infobox>
 
+### Splitting tokens
+
 The [`retokenizer.split`](/api/doc#retokenizer.split) method allows splitting
 one token into two or more tokens. This can be useful for cases where
 tokenization rules alone aren't sufficient. For example, you might want to split
@@ -1168,7 +1184,7 @@ with doc.retokenize() as retokenizer:
 <Infobox title="Important note" variant="warning">
 
 When splitting tokens, the subtoken texts always have to match the original
-token text – or, put differently `''.join(subtokens) == token.text` always needs
+token text – or, put differently `"".join(subtokens) == token.text` always needs
 to hold true. If this wasn't the case, splitting tokens could easily end up
 producing confusing and unexpected results that would contradict spaCy's
 non-destructive tokenization policy.