diff --git a/spacy/tests/doc/test_retokenize_merge.py b/spacy/tests/doc/test_retokenize_merge.py
index 636b7bb14..6d4c436d6 100644
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@@ -414,6 +414,13 @@ def test_doc_retokenizer_merge_lex_attrs(en_vocab):
     assert doc[1].is_stop
     assert not doc[0].is_stop
     assert not doc[1].like_num
+    # Test that norm is only set on tokens
+    doc = Doc(en_vocab, words=["eins", "zwei", "!", "!"])
+    assert doc[0].norm_ == "eins"
+    with doc.retokenize() as retokenizer:
+        retokenizer.merge(doc[0:1], attrs={"norm": "1"})
+    assert doc[0].norm_ == "1"
+    assert en_vocab["eins"].norm_ == "eins"
 
 
 def test_retokenize_skip_duplicates(en_vocab):
diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx
index 4a030bef6..485e52304 100644
--- a/spacy/tokens/_retokenize.pyx
+++ b/spacy/tokens/_retokenize.pyx
@@ -16,7 +16,7 @@ from .span cimport Span
 from .token cimport Token
 from ..lexeme cimport Lexeme, EMPTY_LEXEME
 from ..structs cimport LexemeC, TokenC
-from ..attrs cimport TAG
+from ..attrs cimport TAG, NORM
 
 from .underscore import is_writable_attr
 from ..attrs import intify_attrs
@@ -238,9 +238,10 @@ def _merge(Doc doc, merges):
                 # Set attributes on both token and lexeme to take care of token
                 # attribute vs. lexical attribute without having to enumerate
                 # them. If an attribute name is not valid, set_struct_attr will
-                # ignore it.
+                # ignore it. Exception: set NORM only on tokens.
                 Token.set_struct_attr(token, attr_name, attr_value)
-                Lexeme.set_struct_attr(<LexemeC*>lex, attr_name, attr_value)
+                if attr_name != NORM:
+                    Lexeme.set_struct_attr(<LexemeC*>lex, attr_name, attr_value)
     # Begin by setting all the head indices to absolute token positions
     # This is easier to work with for now than the offsets
     # Before thinking of something simpler, beware the case where a
@@ -393,9 +394,10 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
                 # Set attributes on both token and lexeme to take care of token
                 # attribute vs. lexical attribute without having to enumerate
                 # them. If an attribute name is not valid, set_struct_attr will
-                # ignore it.
+                # ignore it. Exception: set NORM only on tokens.
                 Token.set_struct_attr(token, attr_name, get_string_id(attr_value))
-                Lexeme.set_struct_attr(<LexemeC*>token.lex, attr_name, get_string_id(attr_value))
+                if attr_name != NORM:
+                    Lexeme.set_struct_attr(<LexemeC*>token.lex, attr_name, get_string_id(attr_value))
     # Assign correct dependencies to the inner token
     for i, head in enumerate(heads):
         doc.c[token_index + i].head = head