From 271923eaea508b6640e1fe75b755a9a071ee24a3 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Wed, 9 Dec 2020 11:29:55 +1100
Subject: [PATCH] Fix retokenizer

---
 spacy/tokens/_retokenize.pyx | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx
index ea65c464a..0069e36bf 100644
--- a/spacy/tokens/_retokenize.pyx
+++ b/spacy/tokens/_retokenize.pyx
@@ -11,7 +11,7 @@ from .span cimport Span
 from .token cimport Token
 from ..lexeme cimport Lexeme, EMPTY_LEXEME
 from ..structs cimport LexemeC, TokenC
-from ..attrs cimport MORPH
+from ..attrs cimport MORPH, NORM
 from ..vocab cimport Vocab
 
 from .underscore import is_writable_attr
@@ -436,6 +436,7 @@ def set_token_attrs(Token py_token, attrs):
             # Set attributes on both token and lexeme to take care of token
             # attribute vs. lexical attribute without having to enumerate
             # them. If an attribute name is not valid, set_struct_attr will
-            # ignore it.
+            # ignore it. Exception: set NORM only on tokens.
             Token.set_struct_attr(token, attr_name, attr_value)
-            Lexeme.set_struct_attr(<LexemeC*>lex, attr_name, attr_value)
+            if attr_name != NORM:
+                Lexeme.set_struct_attr(<LexemeC*>lex, attr_name, attr_value)