From 71954d5fe7b9e98e34f894af04df32eafbf56147 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 10:32:53 +0200
Subject: [PATCH 01/20] Update Thinc version

---
 requirements.txt | 2 +-
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 7cd5fba43..ae50be598 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ pathlib
 numpy>=1.7
 cymem>=1.30,<1.32
 preshed>=1.0.0,<2.0.0
-thinc>=6.7.1,<6.8.0
+thinc>=6.7.2,<6.8.0
 murmurhash>=0.28,<0.29
 plac<1.0.0,>=0.9.6
 six
diff --git a/setup.py b/setup.py
index a16b35748..c317c537f 100755
--- a/setup.py
+++ b/setup.py
@@ -191,7 +191,7 @@ def setup_package():
                 'murmurhash>=0.28,<0.29',
                 'cymem>=1.30,<1.32',
                 'preshed>=1.0.0,<2.0.0',
-                'thinc>=6.7.1,<6.8.0',
+                'thinc>=6.7.2,<6.8.0',
                 'plac<1.0.0,>=0.9.6',
                 'pip>=9.0.0,<10.0.0',
                 'six',

From 5109bba91018729952a3263418ad0f5ab114fce1 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 11:31:11 +0200
Subject: [PATCH 02/20] Port over fix from #1070

---
 spacy/tokens/doc.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 84b39d454..e22a35875 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -437,7 +437,8 @@ cdef class Doc:
         """
         def __get__(self):
             if 'sents' in self.user_hooks:
-                return self.user_hooks['sents'](self)
+                yield from self.user_hooks['sents'](self)
+                return
 
             if not self.is_parsed:
                 raise ValueError(

From 459a1e8470f244623804aea9bef13d394562d558 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 11:31:18 +0200
Subject: [PATCH 03/20] Fix whitespace

---
 spacy/tokens/doc.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index e22a35875..b2706ea6f 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -741,7 +741,7 @@ cdef class Doc:
         token.spacy = self.c[end-1].spacy
         for attr_name, attr_value in attributes.items():
             if attr_name == TAG:
-                self.vocab.morphology.assign_tag(token, attr_value) 
+                self.vocab.morphology.assign_tag(token, attr_value)
             else:
                 Token.set_struct_attr(token, attr_name, attr_value)
         # Begin by setting all the head indices to absolute token positions

From c60431357de50de6caada0802b514c6e618b6c2a Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 11:31:30 +0200
Subject: [PATCH 04/20] Port over docs typo corrections

---
 website/docs/api/doc.jade                     | 2 +-
 website/docs/usage/customizing-tokenizer.jade | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/website/docs/api/doc.jade b/website/docs/api/doc.jade
index 9b8392fcb..4228aed8f 100644
--- a/website/docs/api/doc.jade
+++ b/website/docs/api/doc.jade
@@ -64,7 +64,7 @@ p
     doc = nlp(u'Give it back! He pleaded.')
     assert doc[0].text == 'Give'
     assert doc[-1].text == '.'
-    span = doc[1:1]
+    span = doc[1:3]
     assert span.text == 'it back'
 
 +table(["Name", "Type", "Description"])
diff --git a/website/docs/usage/customizing-tokenizer.jade b/website/docs/usage/customizing-tokenizer.jade
index 5c9a9fd78..f56ce9fb1 100644
--- a/website/docs/usage/customizing-tokenizer.jade
+++ b/website/docs/usage/customizing-tokenizer.jade
@@ -141,7 +141,7 @@ p
                 else:
                     tokens.append(substring)
                     substring = ''
-            tokens.extend(suffixes)
+            tokens.extend(reversed(suffixes))
             return tokens
 
 p

From 70fbba7d085fb756c976021cebec9d0474b8e336 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 13:24:43 +0200
Subject: [PATCH 05/20] Clone Doc to never merge punctuation on original Doc

---
 spacy/displacy/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index b27370909..3bb0b8aec 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -65,12 +65,13 @@ def app(environ, start_response):
     return [res]
 
 
-def parse_deps(doc, options={}):
+def parse_deps(orig_doc, options={}):
     """Generate dependency parse in {'words': [], 'arcs': []} format.
 
     doc (Doc): Document do parse.
     RETURNS (dict): Generated dependency parse keyed by words and arcs.
     """
+    doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes())
     if options.get('collapse_punct', True):
         spans = []
         for word in doc[:-1]:

From cc8c8617a4e078afcb6ed8de0235be505561dea1 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 13:24:56 +0200
Subject: [PATCH 06/20] Shut down displaCy server on KeyboardInterrupt

---
 spacy/displacy/__init__.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index 3bb0b8aec..8468720cd 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -56,7 +56,12 @@ def serve(docs, style='dep', page=True, minify=False, options={}, manual=False,
     render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
     httpd = simple_server.make_server('0.0.0.0', port, app)
     prints("Using the '%s' visualizer" % style, title="Serving on port %d..." % port)
-    httpd.serve_forever()
+    try:
+        httpd.serve_forever()
+    except KeyboardInterrupt:
+        prints("Shutting down server on port %d." % port)
+    finally:
+        httpd.server_close()
 
 
 def app(environ, start_response):

From 32c6f05de91b8ae7a189bec4c4efb11f50d78947 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 13:25:32 +0200
Subject: [PATCH 07/20] Adjust spacing and sizing in compact mode

---
 spacy/displacy/render.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index e9b792881..1050ffa87 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -18,12 +18,11 @@ class DependencyRenderer(object):
                         offset_x, color, bg, font)
         """
         self.compact = options.get('compact', False)
-        distance, arrow_width = (85, 8) if self.compact else (175, 10)
         self.word_spacing = options.get('word_spacing', 45)
-        self.arrow_spacing = options.get('arrow_spacing', 20)
-        self.arrow_width = options.get('arrow_width', arrow_width)
+        self.arrow_spacing = options.get('arrow_spacing', 12 if self.compact else 20)
+        self.arrow_width = options.get('arrow_width', 6 if self.compact else 10)
         self.arrow_stroke = options.get('arrow_stroke', 2)
-        self.distance = options.get('distance', distance)
+        self.distance = options.get('distance', 150 if self.compact else 175)
         self.offset_x = options.get('offset_x', 50)
         self.color = options.get('color', '#000000')
         self.bg = options.get('bg', '#ffffff')
@@ -99,6 +98,8 @@ class DependencyRenderer(object):
         x_end = (self.offset_x+(end-start)*self.distance+start*self.distance
                  -self.arrow_spacing*(self.highest_level-level)/4)
         y_curve = self.offset_y-level*self.distance/2
+        if self.compact:
+            y_curve = self.offset_y-level*self.distance/6
         if y_curve == 0 and len(self.levels) > 5:
             y_curve = -self.distance
         arrowhead = self.get_arrowhead(direction, x_start, y, x_end)

From 82154a1861170538e4afe705baa285440ab30476 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 13:25:41 +0200
Subject: [PATCH 08/20] Add letter spacing to arrow label

---
 spacy/displacy/templates.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py
index 54df44489..2f6fc22de 100644
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@@ -21,7 +21,7 @@ TPL_DEP_WORDS = """
 TPL_DEP_ARCS = """
 <g class="displacy-arrow">
     <path class="displacy-arc" id="arrow-{id}-{i}" stroke-width="{stroke}px" d="{arc}" fill="none" stroke="currentColor"/>
-    <text dy="1.25em" style="font-size: 0.8em">
+    <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
         <textPath xlink:href="#arrow-{id}-{i}" class="displacy-label" startOffset="50%" fill="currentColor" text-anchor="middle">{label}</textPath>
     </text>
     <path class="displacy-arrowhead" d="{head}" fill="currentColor"/>

From 0153b66a861e023ba23dc0d23e6b5a0cc9ca0519 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 13:26:13 +0200
Subject: [PATCH 09/20] Return self in Tokenizer.from_bytes

---
 spacy/tokenizer.pyx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 20d2d7a47..a7067f69e 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -392,3 +392,4 @@ cdef class Tokenizer:
             self.token_match = re.compile(data['token_match']).search
         for string, substrings in data.get('rules', {}).items():
             self.add_special_case(string, substrings)
+        return self

From de974f7bef19dbddc046f07bb2a58b8afa3dba09 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 13:26:34 +0200
Subject: [PATCH 10/20] Add serializer tests for tokenizer

---
 .../serialize/test_serialize_tokenizer.py     | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 spacy/tests/serialize/test_serialize_tokenizer.py

diff --git a/spacy/tests/serialize/test_serialize_tokenizer.py b/spacy/tests/serialize/test_serialize_tokenizer.py
new file mode 100644
index 000000000..2e3d78c14
--- /dev/null
+++ b/spacy/tests/serialize/test_serialize_tokenizer.py
@@ -0,0 +1,25 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from ..util import make_tempdir
+
+import pytest
+
+
+@pytest.mark.parametrize('text', ["I can't do this"])
+def test_serialize_tokenizer_roundtrip_bytes(en_tokenizer, text):
+    tokenizer_b = en_tokenizer.to_bytes()
+    new_tokenizer = en_tokenizer.from_bytes(tokenizer_b)
+    assert new_tokenizer.to_bytes() == tokenizer_b
+    doc1 = en_tokenizer(text)
+    doc2 = new_tokenizer(text)
+    assert [token.text for token in doc1] == [token.text for token in doc2]
+
+
+def test_serialize_tokenizer_roundtrip_disk(en_tokenizer):
+    tokenizer = en_tokenizer
+    with make_tempdir() as d:
+        file_path = d / 'tokenizer'
+        tokenizer.to_disk(file_path)
+        tokenizer_d = en_tokenizer.from_disk(file_path)
+        assert tokenizer.to_bytes() == tokenizer_d.to_bytes()

From b0225183c2487ac1a5ca617e2169b40b3c67bff7 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 13:27:06 +0200
Subject: [PATCH 11/20] Update displaCy defaults

---
 website/docs/api/displacy.jade | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/api/displacy.jade b/website/docs/api/displacy.jade
index 415fab77d..59fcca3ca 100644
--- a/website/docs/api/displacy.jade
+++ b/website/docs/api/displacy.jade
@@ -205,7 +205,7 @@ p
         +cell #[code arrow_spacing]
         +cell int
         +cell Spacing between arrows in px to avoid overlaps.
-        +cell #[code 20]
+        +cell #[code 20] / #[code 12] (compact)
 
     +row
         +cell #[code word_spacing]

From 9acf8686f7bcaae05ed7a411c8f3b2581dc093b7 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 13:31:16 +0200
Subject: [PATCH 12/20] Update note on compact mode issues

---
 website/docs/usage/visualizers.jade | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/website/docs/usage/visualizers.jade b/website/docs/usage/visualizers.jade
index b26fbc27a..62dc8e871 100644
--- a/website/docs/usage/visualizers.jade
+++ b/website/docs/usage/visualizers.jade
@@ -59,9 +59,11 @@ p
     |  to customise the layout, for example:
 
 +aside("Important note")
-    |  There's currently a known issue with the #[code compact] mode for long
-    |  sentences with arrow spacing. If the spacing is larger than the arc
-    |  itself, it'll cause the arc and its label to flip.
+    |  There's currently a known issue with the #[code compact] mode for
+    |  sentences with short arrows and long dependency labels, that causes labels
+    |  longer than the arrow to wrap. So if you come across this problem,
+    |  especially when using custom labels, you'll have to increase the
+    |  #[code distance] setting in the #[code options] to allow longer arcs.
 
 +table(["Name", "Type", "Description", "Default"])
     +row

From 1ebd0d3f276d09a7de72a386d7b52808c3e6ce56 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 17:04:30 +0200
Subject: [PATCH 13/20] Add assert_packed_msg_equal util function

---
 spacy/tests/util.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/spacy/tests/util.py b/spacy/tests/util.py
index 7f8884235..56aeb5223 100644
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@@ -10,6 +10,7 @@ import numpy
 import tempfile
 import shutil
 import contextlib
+import msgpack
 from pathlib import Path
 
 
@@ -105,3 +106,13 @@ def assert_docs_equal(doc1, doc2):
     assert [ t.ent_type for t in doc1 ] == [ t.ent_type for t in doc2 ]
     assert [ t.ent_iob for t in doc1 ] == [ t.ent_iob for t in doc2 ]
     assert [ ent for ent in doc1.ents ] == [ ent for ent in doc2.ents ]
+
+
+def assert_packed_msg_equal(b1, b2):
+    """Assert that two packed msgpack messages are equal."""
+    msg1 = msgpack.loads(b1, encoding='utf8')
+    msg2 = msgpack.loads(b2, encoding='utf8')
+    assert sorted(msg1.keys()) == sorted(msg2.keys())
+    for (k1, v1), (k2, v2) in zip(sorted(msg1.items()), sorted(msg2.items())):
+        assert k1 == k2
+        assert v1 == v2

From 7c919aeb09eec6888d1b6918ff4421921b5cc90f Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 17:05:09 +0200
Subject: [PATCH 14/20] Make sure serializers and deserializers are ordered

---
 spacy/tokenizer.pyx | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index a7067f69e..de184baba 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -2,6 +2,7 @@
 # coding: utf8
 from __future__ import unicode_literals
 
+from collections import OrderedDict
 from cython.operator cimport dereference as deref
 from cython.operator cimport preincrement as preinc
 from cymem.cymem cimport Pool
@@ -355,14 +356,14 @@ cdef class Tokenizer:
         **exclude: Named attributes to prevent from being serialized.
         RETURNS (bytes): The serialized form of the `Tokenizer` object.
         """
-        serializers = {
-            'vocab': lambda: self.vocab.to_bytes(),
-            'prefix_search': lambda: self.prefix_search.__self__.pattern,
-            'suffix_search': lambda: self.suffix_search.__self__.pattern,
-            'infix_finditer': lambda: self.infix_finditer.__self__.pattern,
-            'token_match': lambda: self.token_match.__self__.pattern,
-            'exceptions': lambda: self._rules
-        }
+        serializers = OrderedDict((
+            ('vocab', lambda: self.vocab.to_bytes()),
+            ('prefix_search', lambda: self.prefix_search.__self__.pattern),
+            ('suffix_search', lambda: self.suffix_search.__self__.pattern),
+            ('infix_finditer', lambda: self.infix_finditer.__self__.pattern),
+            ('token_match', lambda: self.token_match.__self__.pattern),
+            ('exceptions', lambda: OrderedDict(sorted(self._rules.items())))
+        ))
         return util.to_bytes(serializers, exclude)
 
     def from_bytes(self, bytes_data, **exclude):
@@ -372,15 +373,15 @@ cdef class Tokenizer:
         **exclude: Named attributes to prevent from being loaded.
         RETURNS (Tokenizer): The `Tokenizer` object.
         """
-        data = {}
-        deserializers = {
-            'vocab': lambda b: self.vocab.from_bytes(b),
-            'prefix_search': lambda b: data.setdefault('prefix', b),
-            'suffix_search': lambda b: data.setdefault('suffix_search', b),
-            'infix_finditer': lambda b: data.setdefault('infix_finditer', b),
-            'token_match': lambda b: data.setdefault('token_match', b),
-            'exceptions': lambda b: data.setdefault('rules', b)
-        }
+        data = OrderedDict()
+        deserializers = OrderedDict((
+            ('vocab', lambda b: self.vocab.from_bytes(b)),
+            ('prefix_search', lambda b: data.setdefault('prefix', b)),
+            ('suffix_search', lambda b: data.setdefault('suffix_search', b)),
+            ('infix_finditer', lambda b: data.setdefault('infix_finditer', b)),
+            ('token_match', lambda b: data.setdefault('token_match', b)),
+            ('exceptions', lambda b: data.setdefault('rules', b))
+        ))
         msg = util.from_bytes(bytes_data, deserializers, exclude)
         if 'prefix_search' in data:
             self.prefix_search = re.compile(data['prefix_search']).search

From 3152ee5ca2f21708e428faac5eaadbb403d0a1dc Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 17:05:28 +0200
Subject: [PATCH 15/20] Update serialization tests for tokenizer

---
 .../serialize/test_serialize_tokenizer.py     | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/spacy/tests/serialize/test_serialize_tokenizer.py b/spacy/tests/serialize/test_serialize_tokenizer.py
index 2e3d78c14..e893d3a77 100644
--- a/spacy/tests/serialize/test_serialize_tokenizer.py
+++ b/spacy/tests/serialize/test_serialize_tokenizer.py
@@ -1,17 +1,25 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-from ..util import make_tempdir
+from ...util import get_lang_class
+from ..util import make_tempdir, assert_packed_msg_equal
 
 import pytest
 
 
-@pytest.mark.parametrize('text', ["I can't do this"])
+def load_tokenizer(b):
+    tok = get_lang_class('en').Defaults.create_tokenizer()
+    tok.from_bytes(b)
+    return tok
+
+
+@pytest.mark.parametrize('text', ["I💜you", "they’re", "“hello”"])
 def test_serialize_tokenizer_roundtrip_bytes(en_tokenizer, text):
-    tokenizer_b = en_tokenizer.to_bytes()
-    new_tokenizer = en_tokenizer.from_bytes(tokenizer_b)
-    assert new_tokenizer.to_bytes() == tokenizer_b
-    doc1 = en_tokenizer(text)
+    tokenizer = en_tokenizer
+    new_tokenizer = load_tokenizer(tokenizer.to_bytes())
+    assert_packed_msg_equal(new_tokenizer.to_bytes(), tokenizer.to_bytes())
+    # assert new_tokenizer.to_bytes() == tokenizer.to_bytes()
+    doc1 = tokenizer(text)
     doc2 = new_tokenizer(text)
     assert [token.text for token in doc1] == [token.text for token in doc2]
 

From 05fe6758a71c0e524405d59b005eab0656f41098 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 19:44:39 +0200
Subject: [PATCH 16/20] Set lexeme attributes for tokenizer special cases

---
 spacy/vocab.pyx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index d3aa426cd..6655925e4 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -231,11 +231,13 @@ cdef class Vocab:
             props = intify_attrs(props, strings_map=self.strings, _do_deprecated=True)
             token = &tokens[i]
             # Set the special tokens up to have arbitrary attributes
-            token.lex = <LexemeC*>self.get_by_orth(self.mem, props[attrs.ORTH])
+            lex = <LexemeC*>self.get_by_orth(self.mem, props[attrs.ORTH])
+            token.lex = lex
             if attrs.TAG in props:
                 self.morphology.assign_tag(token, props[attrs.TAG])
             for attr_id, value in props.items():
                 Token.set_struct_attr(token, attr_id, value)
+                Lexeme.set_struct_attr(lex, attr_id, value)
         return tokens
 
     @property

From 4c2bbc3ccc2c6830846764376a52edb307ef592e Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 19:44:47 +0200
Subject: [PATCH 17/20] Add add_lookups util function

---
 spacy/util.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/spacy/util.py b/spacy/util.py
index 55f2a49bb..469123479 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -299,6 +299,22 @@ def compile_infix_regex(entries):
     return re.compile(expression)
 
 
+def add_lookups(default_func, *lookups):
+    """Extend an attribute function with special cases. If a word is in the
+    lookups, the value is returned. Otherwise the previous function is used.
+
+    default_func (callable): The default function to execute.
+    *lookups (dict): Lookup dictionary mapping string to attribute value.
+    RETURNS (callable): Lexical attribute getter.
+    """
+    def get_attr(string):
+        for lookup in lookups:
+            if string in lookup:
+                return lookup[string]
+        return default_func(string)
+    return get_attr
+
+
 def update_exc(base_exceptions, *addition_dicts):
     """Update and validate tokenizer exceptions. Will overwrite exceptions.
 

From e5d426406ad3661a2863c06339f896da451d9450 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 20:27:05 +0200
Subject: [PATCH 18/20] Add base norm exceptions

---
 spacy/lang/norm_exceptions.py | 46 +++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 spacy/lang/norm_exceptions.py

diff --git a/spacy/lang/norm_exceptions.py b/spacy/lang/norm_exceptions.py
new file mode 100644
index 000000000..b02dda2c8
--- /dev/null
+++ b/spacy/lang/norm_exceptions.py
@@ -0,0 +1,46 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+# These exceptions are used to add NORM values based on a token's ORTH value.
+# Individual languages can also add their own exceptions and overwrite them -
+# for example, British vs. American spelling in English.
+
+# Norms are only set if no alternative is provided in the tokenizer exceptions.
+# Note that this does not change any other token attributes. Its main purpose
+# is to normalise the word representations so that equivalent tokens receive
+# similar representations. For example: $ and € are very different, but they're
+# both currency symbols. By normalising currency symbols to $, all symbols are
+# seen as similar, no matter how common they are in the training data.
+
+
+BASE_NORMS = {
+    "'s": "'s",
+    "'S": "'s",
+    "’s": "'s",
+    "’S": "'s",
+    "’": "'",
+    "‘": "'",
+    "´": "'",
+    "`": "'",
+    "”": '"',
+    "“": '"',
+    "''": '"',
+    "``": '"',
+    "´´": '"',
+    "„": '"',
+    "»": '"',
+    "«": '"',
+    "…": "...",
+    "—": "-",
+    "–": "-",
+    "--": "-",
+    "---": "-",
+    "€": "$",
+    "£": "$",
+    "¥": "$",
+    "฿": "$",
+    "US$": "$",
+    "C$": "$",
+    "A$": "$"
+}

From 095eeeb12f208fb368b1fcd5eae6a9b99eaa2c8b Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 20:27:16 +0200
Subject: [PATCH 19/20] Update English tokenizer exceptions and add norms

---
 spacy/lang/en/tokenizer_exceptions.py | 366 +++++++++++++-------------
 1 file changed, 187 insertions(+), 179 deletions(-)

diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py
index 5c6e3f893..392532619 100644
--- a/spacy/lang/en/tokenizer_exceptions.py
+++ b/spacy/lang/en/tokenizer_exceptions.py
@@ -15,20 +15,20 @@ _exclude = ["Ill", "ill", "Its", "its", "Hell", "hell", "Shell", "shell",
 for pron in ["i"]:
     for orth in [pron, pron.title()]:
         _exc[orth + "'m"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "'m", LEMMA: "be", TAG: "VBP", "tenspect": 1, "number": 1}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "'m", LEMMA: "be", NORM: "am", TAG: "VBP", "tenspect": 1, "number": 1}]
 
         _exc[orth + "m"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
             {ORTH: "m", LEMMA: "be", TAG: "VBP", "tenspect": 1, "number": 1 }]
 
         _exc[orth + "'ma"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
             {ORTH: "'m", LEMMA: "be", NORM: "am"},
             {ORTH: "a", LEMMA: "going to", NORM: "gonna"}]
 
         _exc[orth + "ma"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
             {ORTH: "m", LEMMA: "be", NORM: "am"},
             {ORTH: "a", LEMMA: "going to", NORM: "gonna"}]
 
@@ -36,72 +36,72 @@ for pron in ["i"]:
 for pron in ["i", "you", "he", "she", "it", "we", "they"]:
     for orth in [pron, pron.title()]:
         _exc[orth + "'ll"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "'ll", LEMMA: "will", TAG: "MD"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"}]
 
         _exc[orth + "ll"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "ll", LEMMA: "will", TAG: "MD"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"}]
 
         _exc[orth + "'ll've"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "'ll", LEMMA: "will", TAG: "MD"},
-            {ORTH: "'ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"},
+            {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
         _exc[orth + "llve"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "ll", LEMMA: "will", TAG: "MD"},
-            {ORTH: "ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"},
+            {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
         _exc[orth + "'d"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "'d", LEMMA: "would", TAG: "MD"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "'d", LEMMA: "would", NORM: "would", TAG: "MD"}]
 
         _exc[orth + "d"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "d", LEMMA: "would", TAG: "MD"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "d", LEMMA: "would", NORM: "would", TAG: "MD"}]
 
         _exc[orth + "'d've"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "'d", LEMMA: "would", TAG: "MD"},
-            {ORTH: "'ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "'d", LEMMA: "would", NORM: "would", TAG: "MD"},
+            {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
         _exc[orth + "dve"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "d", LEMMA: "would", TAG: "MD"},
-            {ORTH: "ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "d", LEMMA: "would", NORM: "would", TAG: "MD"},
+            {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
 
 for pron in ["i", "you", "we", "they"]:
     for orth in [pron, pron.title()]:
         _exc[orth + "'ve"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "'ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
         _exc[orth + "ve"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
 
 for pron in ["you", "we", "they"]:
     for orth in [pron, pron.title()]:
         _exc[orth + "'re"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
             {ORTH: "'re", LEMMA: "be", NORM: "are"}]
 
         _exc[orth + "re"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
             {ORTH: "re", LEMMA: "be", NORM: "are", TAG: "VBZ"}]
 
 
 for pron in ["he", "she", "it"]:
     for orth in [pron, pron.title()]:
         _exc[orth + "'s"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
-            {ORTH: "'s"}]
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
+            {ORTH: "'s", NORM: "'s"}]
 
         _exc[orth + "s"] = [
-            {ORTH: orth, LEMMA: PRON_LEMMA, TAG: "PRP"},
+            {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
             {ORTH: "s"}]
 
 
@@ -110,111 +110,111 @@ for pron in ["he", "she", "it"]:
 for word in ["who", "what", "when", "where", "why", "how", "there", "that"]:
     for orth in [word, word.title()]:
         _exc[orth + "'s"] = [
-            {ORTH: orth, LEMMA: word},
-            {ORTH: "'s"}]
+            {ORTH: orth, LEMMA: word, NORM: word},
+            {ORTH: "'s", NORM: "'s"}]
 
         _exc[orth + "s"] = [
-            {ORTH: orth, LEMMA: word},
+            {ORTH: orth, LEMMA: word, NORM: word},
             {ORTH: "s"}]
 
         _exc[orth + "'ll"] = [
-            {ORTH: orth, LEMMA: word},
-            {ORTH: "'ll", LEMMA: "will", TAG: "MD"}]
+            {ORTH: orth, LEMMA: word, NORM: word},
+            {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"}]
 
         _exc[orth + "ll"] = [
-            {ORTH: orth, LEMMA: word},
-            {ORTH: "ll", LEMMA: "will", TAG: "MD"}]
+            {ORTH: orth, LEMMA: word, NORM: word},
+            {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"}]
 
         _exc[orth + "'ll've"] = [
-            {ORTH: orth, LEMMA: word},
-            {ORTH: "'ll", LEMMA: "will", TAG: "MD"},
-            {ORTH: "'ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: orth, LEMMA: word, NORM: word},
+            {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"},
+            {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
         _exc[orth + "llve"] = [
-            {ORTH: orth, LEMMA: word},
-            {ORTH: "ll", LEMMA: "will", TAG: "MD"},
-            {ORTH: "ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: orth, LEMMA: word, NORM: word},
+            {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"},
+            {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
         _exc[orth + "'re"] = [
-            {ORTH: orth, LEMMA: word},
+            {ORTH: orth, LEMMA: word, NORM: word},
             {ORTH: "'re", LEMMA: "be", NORM: "are"}]
 
         _exc[orth + "re"] = [
-            {ORTH: orth, LEMMA: word},
+            {ORTH: orth, LEMMA: word, NORM: word},
             {ORTH: "re", LEMMA: "be", NORM: "are"}]
 
         _exc[orth + "'ve"] = [
-            {ORTH: orth},
+            {ORTH: orth, LEMMA: word, NORM: word},
             {ORTH: "'ve", LEMMA: "have", TAG: "VB"}]
 
         _exc[orth + "ve"] = [
             {ORTH: orth, LEMMA: word},
-            {ORTH: "ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
         _exc[orth + "'d"] = [
-            {ORTH: orth, LEMMA: word},
-            {ORTH: "'d"}]
+            {ORTH: orth, LEMMA: word, NORM: word},
+            {ORTH: "'d", NORM: "'d"}]
 
         _exc[orth + "d"] = [
-            {ORTH: orth, LEMMA: word},
+            {ORTH: orth, LEMMA: word, NORM: word},
             {ORTH: "d"}]
 
         _exc[orth + "'d've"] = [
-            {ORTH: orth, LEMMA: word},
-            {ORTH: "'d", LEMMA: "would", TAG: "MD"},
-            {ORTH: "'ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: orth, LEMMA: word, NORM: word},
+            {ORTH: "'d", LEMMA: "would", NORM: "would", TAG: "MD"},
+            {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
         _exc[orth + "dve"] = [
-            {ORTH: orth, LEMMA: word},
-            {ORTH: "d", LEMMA: "would", TAG: "MD"},
-            {ORTH: "ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: orth, LEMMA: word, NORM: word},
+            {ORTH: "d", LEMMA: "would", NORM: "would", TAG: "MD"},
+            {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
 
 # Verbs
 
 for verb_data in [
-    {ORTH: "ca", LEMMA: "can", TAG: "MD"},
-    {ORTH: "could", TAG: "MD"},
-    {ORTH: "do", LEMMA: "do"},
-    {ORTH: "does", LEMMA: "do"},
-    {ORTH: "did", LEMMA: "do", TAG: "VBD"},
-    {ORTH: "had", LEMMA: "have", TAG: "VBD"},
-    {ORTH: "may", TAG: "MD"},
-    {ORTH: "might", TAG: "MD"},
-    {ORTH: "must", TAG: "MD"},
-    {ORTH: "need"},
-    {ORTH: "ought"},
-    {ORTH: "sha", LEMMA: "shall", TAG: "MD"},
-    {ORTH: "should", TAG: "MD"},
-    {ORTH: "wo", LEMMA: "will", TAG: "MD"},
-    {ORTH: "would", TAG: "MD"}]:
+    {ORTH: "ca", LEMMA: "can", NORM: "can", TAG: "MD"},
+    {ORTH: "could", NORM: "could", TAG: "MD"},
+    {ORTH: "do", LEMMA: "do", NORM: "do"},
+    {ORTH: "does", LEMMA: "do", NORM: "does"},
+    {ORTH: "did", LEMMA: "do", NORM: "do", TAG: "VBD"},
+    {ORTH: "had", LEMMA: "have", NORM: "have", TAG: "VBD"},
+    {ORTH: "may", NORM: "may", TAG: "MD"},
+    {ORTH: "might", NORM: "might", TAG: "MD"},
+    {ORTH: "must", NORM: "must", TAG: "MD"},
+    {ORTH: "need", NORM: "need"},
+    {ORTH: "ought", NORM: "ought", TAG: "MD"},
+    {ORTH: "sha", LEMMA: "shall", NORM: "shall", TAG: "MD"},
+    {ORTH: "should", NORM: "should", TAG: "MD"},
+    {ORTH: "wo", LEMMA: "will", NORM: "will", TAG: "MD"},
+    {ORTH: "would", NORM: "would", TAG: "MD"}]:
     verb_data_tc = dict(verb_data)
     verb_data_tc[ORTH] = verb_data_tc[ORTH].title()
     for data in [verb_data, verb_data_tc]:
         _exc[data[ORTH] + "n't"] = [
             dict(data),
-            {ORTH: "n't", LEMMA: "not", TAG: "RB"}]
+            {ORTH: "n't", LEMMA: "not", NORM: "not", TAG: "RB"}]
 
         _exc[data[ORTH] + "nt"] = [
             dict(data),
-            {ORTH: "nt", LEMMA: "not", TAG: "RB"}]
+            {ORTH: "nt", LEMMA: "not", NORM: "not", TAG: "RB"}]
 
         _exc[data[ORTH] + "n't've"] = [
             dict(data),
-            {ORTH: "n't", LEMMA: "not", TAG: "RB"},
-            {ORTH: "'ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: "n't", LEMMA: "not", NORM: "not", TAG: "RB"},
+            {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
         _exc[data[ORTH] + "ntve"] = [
             dict(data),
-            {ORTH: "nt", LEMMA: "not", TAG: "RB"},
-            {ORTH: "ve", LEMMA: "have", TAG: "VB"}]
+            {ORTH: "nt", LEMMA: "not", NORM: "not", TAG: "RB"},
+            {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}]
 
 
 for verb_data in [
-    {ORTH: "could", TAG: "MD"},
-    {ORTH: "might"},
-    {ORTH: "must"},
-    {ORTH: "should"}]:
+    {ORTH: "could", NORM: "could", TAG: "MD"},
+    {ORTH: "might", NORM: "might", TAG: "MD"},
+    {ORTH: "must", NORM: "must", TAG: "MD"},
+    {ORTH: "should", NORM: "should", TAG: "MD"}]:
     verb_data_tc = dict(verb_data)
     verb_data_tc[ORTH] = verb_data_tc[ORTH].title()
     for data in [verb_data, verb_data_tc]:
@@ -228,21 +228,21 @@ for verb_data in [
 
 
 for verb_data in [
-    {ORTH: "ai", TAG: "VBP", "number": 2, LEMMA: "be"},
-    {ORTH: "are", LEMMA: "be", TAG: "VBP", "number": 2},
-    {ORTH: "is", LEMMA: "be", TAG: "VBZ"},
-    {ORTH: "was", LEMMA: "be"},
-    {ORTH: "were", LEMMA: "be"}]:
+    {ORTH: "ai", LEMMA: "be", TAG: "VBP", "number": 2},
+    {ORTH: "are", LEMMA: "be", NORM: "are", TAG: "VBP", "number": 2},
+    {ORTH: "is", LEMMA: "be", NORM: "is", TAG: "VBZ"},
+    {ORTH: "was", LEMMA: "be", NORM: "was"},
+    {ORTH: "were", LEMMA: "be", NORM: "were"}]:
     verb_data_tc = dict(verb_data)
     verb_data_tc[ORTH] = verb_data_tc[ORTH].title()
     for data in [verb_data, verb_data_tc]:
         _exc[data[ORTH] + "n't"] = [
             dict(data),
-            {ORTH: "n't", LEMMA: "not", TAG: "RB"}]
+            {ORTH: "n't", LEMMA: "not", NORM: "not", TAG: "RB"}]
 
         _exc[data[ORTH] + "nt"] = [
             dict(data),
-            {ORTH: "nt", LEMMA: "not", TAG: "RB"}]
+            {ORTH: "nt", LEMMA: "not", NORM: "not", TAG: "RB"}]
 
 
 # Other contractions with trailing apostrophe
@@ -250,10 +250,10 @@ for verb_data in [
 for exc_data in [
     {ORTH: "doin", LEMMA: "do", NORM: "doing"},
     {ORTH: "goin", LEMMA: "go", NORM: "going"},
-    {ORTH: "nothin", LEMMA: "nothing"},
-    {ORTH: "nuthin", LEMMA: "nothing"},
-    {ORTH: "ol", LEMMA: "old"},
-    {ORTH: "somethin", LEMMA: "something"}]:
+    {ORTH: "nothin", LEMMA: "nothing", NORM: "nothing"},
+    {ORTH: "nuthin", LEMMA: "nothing", NORM: "nothing"},
+    {ORTH: "ol", LEMMA: "old", NORM: "old"},
+    {ORTH: "somethin", LEMMA: "something", NORM: "something"}]:
     exc_data_tc = dict(exc_data)
     exc_data_tc[ORTH] = exc_data_tc[ORTH].title()
     for data in [exc_data, exc_data_tc]:
@@ -266,10 +266,10 @@ for exc_data in [
 # Other contractions with leading apostrophe
 
 for exc_data in [
-    {ORTH: "cause", LEMMA: "because"},
+    {ORTH: "cause", LEMMA: "because", NORM: "because"},
     {ORTH: "em", LEMMA: PRON_LEMMA, NORM: "them"},
-    {ORTH: "ll", LEMMA: "will"},
-    {ORTH: "nuff", LEMMA: "enough"}]:
+    {ORTH: "ll", LEMMA: "will", NORM: "will"},
+    {ORTH: "nuff", LEMMA: "enough", NORM: "enough"}]:
     exc_data_apos = dict(exc_data)
     exc_data_apos[ORTH] = "'" + exc_data_apos[ORTH]
     for data in [exc_data, exc_data_apos]:
@@ -282,11 +282,11 @@ for h in range(1, 12 + 1):
     for period in ["a.m.", "am"]:
         _exc["%d%s" % (h, period)] = [
             {ORTH: "%d" % h},
-            {ORTH: period, LEMMA: "a.m."}]
+            {ORTH: period, LEMMA: "a.m.", NORM: "a.m."}]
     for period in ["p.m.", "pm"]:
         _exc["%d%s" % (h, period)] = [
             {ORTH: "%d" % h},
-            {ORTH: period, LEMMA: "p.m."}]
+            {ORTH: period, LEMMA: "p.m.", NORM: "p.m."}]
 
 
 # Rest
@@ -306,56 +306,56 @@ _other_exc = {
         {ORTH: "'y", LEMMA: PRON_LEMMA, NORM: "you"}],
 
     "How'd'y": [
-        {ORTH: "How", LEMMA: "how"},
+        {ORTH: "How", LEMMA: "how", NORM: "how"},
         {ORTH: "'d", LEMMA: "do"},
         {ORTH: "'y", LEMMA: PRON_LEMMA, NORM: "you"}],
 
     "not've": [
         {ORTH: "not", LEMMA: "not", TAG: "RB"},
-        {ORTH: "'ve", LEMMA: "have", TAG: "VB"}],
+        {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}],
 
     "notve": [
         {ORTH: "not", LEMMA: "not", TAG: "RB"},
-        {ORTH: "ve", LEMMA: "have", TAG: "VB"}],
+        {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}],
 
     "Not've": [
-        {ORTH: "Not", LEMMA: "not", TAG: "RB"},
-        {ORTH: "'ve", LEMMA: "have", TAG: "VB"}],
+        {ORTH: "Not", LEMMA: "not", NORM: "not", TAG: "RB"},
+        {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}],
 
     "Notve": [
-        {ORTH: "Not", LEMMA: "not", TAG: "RB"},
-        {ORTH: "ve", LEMMA: "have", TAG: "VB"}],
+        {ORTH: "Not", LEMMA: "not", NORM: "not", TAG: "RB"},
+        {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}],
 
     "cannot": [
         {ORTH: "can", LEMMA: "can", TAG: "MD"},
         {ORTH: "not", LEMMA: "not", TAG: "RB"}],
 
     "Cannot": [
-        {ORTH: "Can", LEMMA: "can", TAG: "MD"},
+        {ORTH: "Can", LEMMA: "can", NORM: "can", TAG: "MD"},
         {ORTH: "not", LEMMA: "not", TAG: "RB"}],
 
     "gonna": [
         {ORTH: "gon", LEMMA: "go", NORM: "going"},
-        {ORTH: "na", LEMMA: "to"}],
+        {ORTH: "na", LEMMA: "to", NORM: "to"}],
 
     "Gonna": [
         {ORTH: "Gon", LEMMA: "go", NORM: "going"},
-        {ORTH: "na", LEMMA: "to"}],
+        {ORTH: "na", LEMMA: "to", NORM: "to"}],
 
     "gotta": [
         {ORTH: "got"},
-        {ORTH: "ta", LEMMA: "to"}],
+        {ORTH: "ta", LEMMA: "to", NORM: "to"}],
 
     "Gotta": [
-        {ORTH: "Got"},
-        {ORTH: "ta", LEMMA: "to"}],
+        {ORTH: "Got", NORM: "got"},
+        {ORTH: "ta", LEMMA: "to", NORM: "to"}],
 
     "let's": [
         {ORTH: "let"},
         {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "us"}],
 
     "Let's": [
-        {ORTH: "Let", LEMMA: "let"},
+        {ORTH: "Let", LEMMA: "let", NORM: "let"},
         {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "us"}]
 }
 
@@ -363,72 +363,80 @@ _exc.update(_other_exc)
 
 
 for exc_data in [
-    {ORTH: "'S", LEMMA: "'s"},
-    {ORTH: "'s", LEMMA: "'s"},
-    {ORTH: "\u2018S", LEMMA: "'s"},
-    {ORTH: "\u2018s", LEMMA: "'s"},
-    {ORTH: "and/or", LEMMA: "and/or", TAG: "CC"},
+    {ORTH: "'S", LEMMA: "'s", NORM: "'s"},
+    {ORTH: "'s", LEMMA: "'s", NORM: "'s"},
+    {ORTH: "\u2018S", LEMMA: "'s", NORM: "'s"},
+    {ORTH: "\u2018s", LEMMA: "'s", NORM: "'s"},
+    {ORTH: "and/or", LEMMA: "and/or", NORM: "and/or", TAG: "CC"},
+    {ORTH: "w/o", LEMMA: "without", NORM: "without"},
     {ORTH: "'re", LEMMA: "be", NORM: "are"},
-    {ORTH: "'Cause", LEMMA: "because"},
-    {ORTH: "'cause", LEMMA: "because"},
-    {ORTH: "ma'am", LEMMA: "madam"},
-    {ORTH: "Ma'am", LEMMA: "madam"},
-    {ORTH: "o'clock", LEMMA: "o'clock"},
-    {ORTH: "O'clock", LEMMA: "o'clock"},
+    {ORTH: "'Cause", LEMMA: "because", NORM: "because"},
+    {ORTH: "'cause", LEMMA: "because", NORM: "because"},
+    {ORTH: "'cos", LEMMA: "because", NORM: "because"},
+    {ORTH: "'Cos", LEMMA: "because", NORM: "because"},
+    {ORTH: "'coz", LEMMA: "because", NORM: "because"},
+    {ORTH: "'Coz", LEMMA: "because", NORM: "because"},
+    {ORTH: "'cuz", LEMMA: "because", NORM: "because"},
+    {ORTH: "'Cuz", LEMMA: "because", NORM: "because"},
+    {ORTH: "'bout", LEMMA: "about", NORM: "about"},
+    {ORTH: "ma'am", LEMMA: "madam", NORM: "madam"},
+    {ORTH: "Ma'am", LEMMA: "madam", NORM: "madam"},
+    {ORTH: "o'clock", LEMMA: "o'clock", NORM: "o'clock"},
+    {ORTH: "O'clock", LEMMA: "o'clock", NORM: "o'clock"},
 
-    {ORTH: "Mt.", LEMMA: "Mount"},
-    {ORTH: "Ak.", LEMMA: "Alaska"},
-    {ORTH: "Ala.", LEMMA: "Alabama"},
-    {ORTH: "Apr.", LEMMA: "April"},
-    {ORTH: "Ariz.", LEMMA: "Arizona"},
-    {ORTH: "Ark.", LEMMA: "Arkansas"},
-    {ORTH: "Aug.", LEMMA: "August"},
-    {ORTH: "Calif.", LEMMA: "California"},
-    {ORTH: "Colo.", LEMMA: "Colorado"},
-    {ORTH: "Conn.", LEMMA: "Connecticut"},
-    {ORTH: "Dec.", LEMMA: "December"},
-    {ORTH: "Del.", LEMMA: "Delaware"},
-    {ORTH: "Feb.", LEMMA: "February"},
-    {ORTH: "Fla.", LEMMA: "Florida"},
-    {ORTH: "Ga.", LEMMA: "Georgia"},
-    {ORTH: "Ia.", LEMMA: "Iowa"},
-    {ORTH: "Id.", LEMMA: "Idaho"},
-    {ORTH: "Ill.", LEMMA: "Illinois"},
-    {ORTH: "Ind.", LEMMA: "Indiana"},
-    {ORTH: "Jan.", LEMMA: "January"},
-    {ORTH: "Jul.", LEMMA: "July"},
-    {ORTH: "Jun.", LEMMA: "June"},
-    {ORTH: "Kan.", LEMMA: "Kansas"},
-    {ORTH: "Kans.", LEMMA: "Kansas"},
-    {ORTH: "Ky.", LEMMA: "Kentucky"},
-    {ORTH: "La.", LEMMA: "Louisiana"},
-    {ORTH: "Mar.", LEMMA: "March"},
-    {ORTH: "Mass.", LEMMA: "Massachusetts"},
-    {ORTH: "May.", LEMMA: "May"},
-    {ORTH: "Mich.", LEMMA: "Michigan"},
-    {ORTH: "Minn.", LEMMA: "Minnesota"},
-    {ORTH: "Miss.", LEMMA: "Mississippi"},
-    {ORTH: "N.C.", LEMMA: "North Carolina"},
-    {ORTH: "N.D.", LEMMA: "North Dakota"},
-    {ORTH: "N.H.", LEMMA: "New Hampshire"},
-    {ORTH: "N.J.", LEMMA: "New Jersey"},
-    {ORTH: "N.M.", LEMMA: "New Mexico"},
-    {ORTH: "N.Y.", LEMMA: "New York"},
-    {ORTH: "Neb.", LEMMA: "Nebraska"},
-    {ORTH: "Nebr.", LEMMA: "Nebraska"},
-    {ORTH: "Nev.", LEMMA: "Nevada"},
-    {ORTH: "Nov.", LEMMA: "November"},
-    {ORTH: "Oct.", LEMMA: "October"},
-    {ORTH: "Okla.", LEMMA: "Oklahoma"},
-    {ORTH: "Ore.", LEMMA: "Oregon"},
-    {ORTH: "Pa.", LEMMA: "Pennsylvania"},
-    {ORTH: "S.C.", LEMMA: "South Carolina"},
-    {ORTH: "Sep.", LEMMA: "September"},
-    {ORTH: "Sept.", LEMMA: "September"},
-    {ORTH: "Tenn.", LEMMA: "Tennessee"},
-    {ORTH: "Va.", LEMMA: "Virginia"},
-    {ORTH: "Wash.", LEMMA: "Washington"},
-    {ORTH: "Wis.", LEMMA: "Wisconsin"}]:
+    {ORTH: "Mt.", LEMMA: "Mount", NORM: "Mount"},
+    {ORTH: "Ak.", LEMMA: "Alaska", NORM: "Alaska"},
+    {ORTH: "Ala.", LEMMA: "Alabama", NORM: "Alabama"},
+    {ORTH: "Apr.", LEMMA: "April", NORM: "April"},
+    {ORTH: "Ariz.", LEMMA: "Arizona", NORM: "Arizona"},
+    {ORTH: "Ark.", LEMMA: "Arkansas", NORM: "Arkansas"},
+    {ORTH: "Aug.", LEMMA: "August", NORM: "August"},
+    {ORTH: "Calif.", LEMMA: "California", NORM: "California"},
+    {ORTH: "Colo.", LEMMA: "Colorado", NORM: "Colorado"},
+    {ORTH: "Conn.", LEMMA: "Connecticut", NORM: "Connecticut"},
+    {ORTH: "Dec.", LEMMA: "December", NORM: "December"},
+    {ORTH: "Del.", LEMMA: "Delaware", NORM: "Delaware"},
+    {ORTH: "Feb.", LEMMA: "February", NORM: "February"},
+    {ORTH: "Fla.", LEMMA: "Florida", NORM: "Florida"},
+    {ORTH: "Ga.", LEMMA: "Georgia", NORM: "Georgia"},
+    {ORTH: "Ia.", LEMMA: "Iowa", NORM: "Iowa"},
+    {ORTH: "Id.", LEMMA: "Idaho", NORM: "Idaho"},
+    {ORTH: "Ill.", LEMMA: "Illinois", NORM: "Illinois"},
+    {ORTH: "Ind.", LEMMA: "Indiana", NORM: "Indiana"},
+    {ORTH: "Jan.", LEMMA: "January", NORM: "January"},
+    {ORTH: "Jul.", LEMMA: "July", NORM: "July"},
+    {ORTH: "Jun.", LEMMA: "June", NORM: "June"},
+    {ORTH: "Kan.", LEMMA: "Kansas", NORM: "Kansas"},
+    {ORTH: "Kans.", LEMMA: "Kansas", NORM: "Kansas"},
+    {ORTH: "Ky.", LEMMA: "Kentucky", NORM: "Kentucky"},
+    {ORTH: "La.", LEMMA: "Louisiana", NORM: "Louisiana"},
+    {ORTH: "Mar.", LEMMA: "March", NORM: "March"},
+    {ORTH: "Mass.", LEMMA: "Massachusetts", NORM: "Massachusetts"},
+    {ORTH: "May.", LEMMA: "May", NORM: "May"},
+    {ORTH: "Mich.", LEMMA: "Michigan", NORM: "Michigan"},
+    {ORTH: "Minn.", LEMMA: "Minnesota", NORM: "Minnesota"},
+    {ORTH: "Miss.", LEMMA: "Mississippi", NORM: "Mississippi"},
+    {ORTH: "N.C.", LEMMA: "North Carolina", NORM: "North Carolina"},
+    {ORTH: "N.D.", LEMMA: "North Dakota", NORM: "North Dakota"},
+    {ORTH: "N.H.", LEMMA: "New Hampshire", NORM: "New Hampshire"},
+    {ORTH: "N.J.", LEMMA: "New Jersey", NORM: "New Jersey"},
+    {ORTH: "N.M.", LEMMA: "New Mexico", NORM: "New Mexico"},
+    {ORTH: "N.Y.", LEMMA: "New York", NORM: "New York"},
+    {ORTH: "Neb.", LEMMA: "Nebraska", NORM: "Nebraska"},
+    {ORTH: "Nebr.", LEMMA: "Nebraska", NORM: "Nebraska"},
+    {ORTH: "Nev.", LEMMA: "Nevada", NORM: "Nevada"},
+    {ORTH: "Nov.", LEMMA: "November", NORM: "November"},
+    {ORTH: "Oct.", LEMMA: "October", NORM: "October"},
+    {ORTH: "Okla.", LEMMA: "Oklahoma", NORM: "Oklahoma"},
+    {ORTH: "Ore.", LEMMA: "Oregon", NORM: "Oregon"},
+    {ORTH: "Pa.", LEMMA: "Pennsylvania", NORM: "Pennsylvania"},
+    {ORTH: "S.C.", LEMMA: "South Carolina", NORM: "South Carolina"},
+    {ORTH: "Sep.", LEMMA: "September", NORM: "September"},
+    {ORTH: "Sept.", LEMMA: "September", NORM: "September"},
+    {ORTH: "Tenn.", LEMMA: "Tennessee", NORM: "Tennessee"},
+    {ORTH: "Va.", LEMMA: "Virginia", NORM: "Virginia"},
+    {ORTH: "Wash.", LEMMA: "Washington", NORM: "Washington"},
+    {ORTH: "Wis.", LEMMA: "Wisconsin", NORM: "Wisconsin"}]:
     _exc[exc_data[ORTH]] = [dict(exc_data)]
 
 

From 746653880ce2fd24a511ae03f7d5f0eaa4d861ca Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Sat, 3 Jun 2017 20:27:28 +0200
Subject: [PATCH 20/20] Add English norm exceptions to lex_attrs

---
 spacy/lang/en/__init__.py        |    8 +-
 spacy/lang/en/norm_exceptions.py | 1761 ++++++++++++++++++++++++++++++
 2 files changed, 1767 insertions(+), 2 deletions(-)
 create mode 100644 spacy/lang/en/norm_exceptions.py

diff --git a/spacy/lang/en/__init__.py b/spacy/lang/en/__init__.py
index 7e1da789b..3f422b834 100644
--- a/spacy/lang/en/__init__.py
+++ b/spacy/lang/en/__init__.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from .norm_exceptions import NORM_EXCEPTIONS
 from .tag_map import TAG_MAP
 from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
@@ -10,14 +11,17 @@ from .lemmatizer import LEMMA_RULES, LEMMA_INDEX, LEMMA_EXC
 from .syntax_iterators import SYNTAX_ITERATORS
 
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ..norm_exceptions import BASE_NORMS
 from ...language import Language
-from ...attrs import LANG
-from ...util import update_exc
+from ...attrs import LANG, NORM
+from ...util import update_exc, add_lookups
 
 
 class EnglishDefaults(Language.Defaults):
     lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
     lex_attr_getters[LANG] = lambda text: 'en'
+    lex_attr_getters[NORM] = add_lookups(Language.Defaults.lex_attr_getters[NORM],
+                                         BASE_NORMS, NORM_EXCEPTIONS)
     lex_attr_getters.update(LEX_ATTRS)
 
     tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
diff --git a/spacy/lang/en/norm_exceptions.py b/spacy/lang/en/norm_exceptions.py
new file mode 100644
index 000000000..ec106b960
--- /dev/null
+++ b/spacy/lang/en/norm_exceptions.py
@@ -0,0 +1,1761 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+
+_exc = {
+    # Slang and abbreviations
+    "cos": "because",
+    "cuz": "because",
+    "fav": "favorite",
+    "fave": "favorite",
+    "misc": "miscellaneous",
+    "plz": "please",
+    "pls": "please",
+    "thx": "thanks",
+
+    # US vs. UK spelling
+    "accessorise": "accessorize",
+    "accessorised": "accessorized",
+    "accessorises": "accessorizes",
+    "accessorising": "accessorizing",
+    "acclimatisation": "acclimatization",
+    "acclimatise": "acclimatize",
+    "acclimatised": "acclimatized",
+    "acclimatises": "acclimatizes",
+    "acclimatising": "acclimatizing",
+    "accoutrements": "accouterments",
+    "aeon": "eon",
+    "aeons": "eons",
+    "aerogramme": "aerogram",
+    "aerogrammes": "aerograms",
+    "aeroplane": "airplane",
+    "aeroplanes ": "airplanes ",
+    "aesthete": "esthete",
+    "aesthetes": "esthetes",
+    "aesthetic": "esthetic",
+    "aesthetically": "esthetically",
+    "aesthetics": "esthetics",
+    "aetiology": "etiology",
+    "ageing": "aging",
+    "aggrandisement": "aggrandizement",
+    "agonise": "agonize",
+    "agonised": "agonized",
+    "agonises": "agonizes",
+    "agonising": "agonizing",
+    "agonisingly": "agonizingly",
+    "almanack": "almanac",
+    "almanacks": "almanacs",
+    "aluminium": "aluminum",
+    "amortisable": "amortizable",
+    "amortisation": "amortization",
+    "amortisations": "amortizations",
+    "amortise": "amortize",
+    "amortised": "amortized",
+    "amortises": "amortizes",
+    "amortising": "amortizing",
+    "amphitheatre": "amphitheater",
+    "amphitheatres": "amphitheaters",
+    "anaemia": "anemia",
+    "anaemic": "anemic",
+    "anaesthesia": "anesthesia",
+    "anaesthetic": "anesthetic",
+    "anaesthetics": "anesthetics",
+    "anaesthetise": "anesthetize",
+    "anaesthetised": "anesthetized",
+    "anaesthetises": "anesthetizes",
+    "anaesthetising": "anesthetizing",
+    "anaesthetist": "anesthetist",
+    "anaesthetists": "anesthetists",
+    "anaesthetize": "anesthetize",
+    "anaesthetized": "anesthetized",
+    "anaesthetizes": "anesthetizes",
+    "anaesthetizing": "anesthetizing",
+    "analogue": "analog",
+    "analogues": "analogs",
+    "analyse": "analyze",
+    "analysed": "analyzed",
+    "analyses": "analyzes",
+    "analysing": "analyzing",
+    "anglicise": "anglicize",
+    "anglicised": "anglicized",
+    "anglicises": "anglicizes",
+    "anglicising": "anglicizing",
+    "annualised": "annualized",
+    "antagonise": "antagonize",
+    "antagonised": "antagonized",
+    "antagonises": "antagonizes",
+    "antagonising": "antagonizing",
+    "apologise": "apologize",
+    "apologised": "apologized",
+    "apologises": "apologizes",
+    "apologising": "apologizing",
+    "appal": "appall",
+    "appals": "appalls",
+    "appetiser": "appetizer",
+    "appetisers": "appetizers",
+    "appetising": "appetizing",
+    "appetisingly": "appetizingly",
+    "arbour": "arbor",
+    "arbours": "arbors",
+    "archaeological": "archeological",
+    "archaeologically": "archeologically",
+    "archaeologist": "archeologist",
+    "archaeologists": "archeologists",
+    "archaeology": "archeology",
+    "ardour": "ardor",
+    "armour": "armor",
+    "armoured": "armored",
+    "armourer": "armorer",
+    "armourers": "armorers",
+    "armouries": "armories",
+    "armoury": "armory",
+    "artefact": "artifact",
+    "artefacts": "artifacts",
+    "authorise": "authorize",
+    "authorised": "authorized",
+    "authorises": "authorizes",
+    "authorising": "authorizing",
+    "axe": "ax",
+    "backpedalled": "backpedaled",
+    "backpedalling": "backpedaling",
+    "bannister": "banister",
+    "bannisters": "banisters",
+    "baptise": "baptize",
+    "baptised": "baptized",
+    "baptises": "baptizes",
+    "baptising": "baptizing",
+    "bastardise": "bastardize",
+    "bastardised": "bastardized",
+    "bastardises": "bastardizes",
+    "bastardising": "bastardizing",
+    "battleaxe": "battleax",
+    "baulk": "balk",
+    "baulked": "balked",
+    "baulking": "balking",
+    "baulks": "balks",
+    "bedevilled": "bedeviled",
+    "bedevilling": "bedeviling",
+    "behaviour": "behavior",
+    "behavioural": "behavioral",
+    "behaviourism": "behaviorism",
+    "behaviourist": "behaviorist",
+    "behaviourists": "behaviorists",
+    "behaviours": "behaviors",
+    "behove": "behoove",
+    "behoved": "behooved",
+    "behoves": "behooves",
+    "bejewelled": "bejeweled",
+    "belabour": "belabor",
+    "belaboured": "belabored",
+    "belabouring": "belaboring",
+    "belabours": "belabors",
+    "bevelled": "beveled",
+    "bevvies": "bevies",
+    "bevvy": "bevy",
+    "biassed": "biased",
+    "biassing": "biasing",
+    "bingeing": "binging",
+    "bougainvillaea": "bougainvillea",
+    "bougainvillaeas": "bougainvilleas",
+    "bowdlerise": "bowdlerize",
+    "bowdlerised": "bowdlerized",
+    "bowdlerises": "bowdlerizes",
+    "bowdlerising": "bowdlerizing",
+    "breathalyse": "breathalyze",
+    "breathalysed": "breathalyzed",
+    "breathalyser": "breathalyzer",
+    "breathalysers": "breathalyzers",
+    "breathalyses": "breathalyzes",
+    "breathalysing": "breathalyzing",
+    "brutalise": "brutalize",
+    "brutalised": "brutalized",
+    "brutalises": "brutalizes",
+    "brutalising": "brutalizing",
+    "buses": "busses",
+    "busing": "bussing",
+    "caesarean": "cesarean",
+    "caesareans": "cesareans",
+    "calibre": "caliber",
+    "calibres": "calibers",
+    "calliper": "caliper",
+    "callipers": "calipers",
+    "callisthenics": "calisthenics",
+    "canalise": "canalize",
+    "canalised": "canalized",
+    "canalises": "canalizes",
+    "canalising": "canalizing",
+    "cancellation": "cancelation",
+    "cancellations": "cancelations",
+    "cancelled": "canceled",
+    "cancelling": "canceling",
+    "candour": "candor",
+    "cannibalise": "cannibalize",
+    "cannibalised": "cannibalized",
+    "cannibalises": "cannibalizes",
+    "cannibalising": "cannibalizing",
+    "canonise": "canonize",
+    "canonised": "canonized",
+    "canonises": "canonizes",
+    "canonising": "canonizing",
+    "capitalise": "capitalize",
+    "capitalised": "capitalized",
+    "capitalises": "capitalizes",
+    "capitalising": "capitalizing",
+    "caramelise": "caramelize",
+    "caramelised": "caramelized",
+    "caramelises": "caramelizes",
+    "caramelising": "caramelizing",
+    "carbonise": "carbonize",
+    "carbonised": "carbonized",
+    "carbonises": "carbonizes",
+    "carbonising": "carbonizing",
+    "carolled": "caroled",
+    "carolling": "caroling",
+    "catalogue": "catalog",
+    "catalogued": "cataloged",
+    "catalogues": "catalogs",
+    "cataloguing": "cataloging",
+    "catalyse": "catalyze",
+    "catalysed": "catalyzed",
+    "catalyses": "catalyzes",
+    "catalysing": "catalyzing",
+    "categorise": "categorize",
+    "categorised": "categorized",
+    "categorises": "categorizes",
+    "categorising": "categorizing",
+    "cauterise": "cauterize",
+    "cauterised": "cauterized",
+    "cauterises": "cauterizes",
+    "cauterising": "cauterizing",
+    "cavilled": "caviled",
+    "cavilling": "caviling",
+    "centigramme": "centigram",
+    "centigrammes": "centigrams",
+    "centilitre": "centiliter",
+    "centilitres": "centiliters",
+    "centimetre": "centimeter",
+    "centimetres": "centimeters",
+    "centralise": "centralize",
+    "centralised": "centralized",
+    "centralises": "centralizes",
+    "centralising": "centralizing",
+    "centre": "center",
+    "centred": "centered",
+    "centrefold": "centerfold",
+    "centrefolds": "centerfolds",
+    "centrepiece": "centerpiece",
+    "centrepieces": "centerpieces",
+    "centres": "centers",
+    "channelled": "channeled",
+    "channelling": "channeling",
+    "characterise": "characterize",
+    "characterised": "characterized",
+    "characterises": "characterizes",
+    "characterising": "characterizing",
+    "cheque": "check",
+    "chequebook": "checkbook",
+    "chequebooks": "checkbooks",
+    "chequered": "checkered",
+    "cheques": "checks",
+    "chilli": "chili",
+    "chimaera": "chimera",
+    "chimaeras": "chimeras",
+    "chiselled": "chiseled",
+    "chiselling": "chiseling",
+    "circularise": "circularize",
+    "circularised": "circularized",
+    "circularises": "circularizes",
+    "circularising": "circularizing",
+    "civilise": "civilize",
+    "civilised": "civilized",
+    "civilises": "civilizes",
+    "civilising": "civilizing",
+    "clamour": "clamor",
+    "clamoured": "clamored",
+    "clamouring": "clamoring",
+    "clamours": "clamors",
+    "clangour": "clangor",
+    "clarinettist": "clarinetist",
+    "clarinettists": "clarinetists",
+    "collectivise": "collectivize",
+    "collectivised": "collectivized",
+    "collectivises": "collectivizes",
+    "collectivising": "collectivizing",
+    "colonisation": "colonization",
+    "colonise": "colonize",
+    "colonised": "colonized",
+    "coloniser": "colonizer",
+    "colonisers": "colonizers",
+    "colonises": "colonizes",
+    "colonising": "colonizing",
+    "colour": "color",
+    "colourant": "colorant",
+    "colourants": "colorants",
+    "coloured": "colored",
+    "coloureds": "coloreds",
+    "colourful": "colorful",
+    "colourfully": "colorfully",
+    "colouring": "coloring",
+    "colourize": "colorize",
+    "colourized": "colorized",
+    "colourizes": "colorizes",
+    "colourizing": "colorizing",
+    "colourless": "colorless",
+    "colours": "colors",
+    "commercialise": "commercialize",
+    "commercialised": "commercialized",
+    "commercialises": "commercializes",
+    "commercialising": "commercializing",
+    "compartmentalise": "compartmentalize",
+    "compartmentalised": "compartmentalized",
+    "compartmentalises": "compartmentalizes",
+    "compartmentalising": "compartmentalizing",
+    "computerise": "computerize",
+    "computerised": "computerized",
+    "computerises": "computerizes",
+    "computerising": "computerizing",
+    "conceptualise": "conceptualize",
+    "conceptualised": "conceptualized",
+    "conceptualises": "conceptualizes",
+    "conceptualising": "conceptualizing",
+    "connexion": "connection",
+    "connexions": "connections",
+    "contextualise": "contextualize",
+    "contextualised": "contextualized",
+    "contextualises": "contextualizes",
+    "contextualising": "contextualizing",
+    "cosier": "cozier",
+    "cosies": "cozies",
+    "cosiest": "coziest",
+    "cosily": "cozily",
+    "cosiness": "coziness",
+    "cosy": "cozy",
+    "councillor": "councilor",
+    "councillors": "councilors",
+    "counselled": "counseled",
+    "counselling": "counseling",
+    "counsellor": "counselor",
+    "counsellors": "counselors",
+    "crenellated": "crenelated",
+    "criminalise": "criminalize",
+    "criminalised": "criminalized",
+    "criminalises": "criminalizes",
+    "criminalising": "criminalizing",
+    "criticise": "criticize",
+    "criticised": "criticized",
+    "criticises": "criticizes",
+    "criticising": "criticizing",
+    "crueller": "crueler",
+    "cruellest": "cruelest",
+    "crystallisation": "crystallization",
+    "crystallise": "crystallize",
+    "crystallised": "crystallized",
+    "crystallises": "crystallizes",
+    "crystallising": "crystallizing",
+    "cudgelled": "cudgeled",
+    "cudgelling": "cudgeling",
+    "customise": "customize",
+    "customised": "customized",
+    "customises": "customizes",
+    "customising": "customizing",
+    "cypher": "cipher",
+    "cyphers": "ciphers",
+    "decentralisation": "decentralization",
+    "decentralise": "decentralize",
+    "decentralised": "decentralized",
+    "decentralises": "decentralizes",
+    "decentralising": "decentralizing",
+    "decriminalisation": "decriminalization",
+    "decriminalise": "decriminalize",
+    "decriminalised": "decriminalized",
+    "decriminalises": "decriminalizes",
+    "decriminalising": "decriminalizing",
+    "defence": "defense",
+    "defenceless": "defenseless",
+    "defences": "defenses",
+    "dehumanisation": "dehumanization",
+    "dehumanise": "dehumanize",
+    "dehumanised": "dehumanized",
+    "dehumanises": "dehumanizes",
+    "dehumanising": "dehumanizing",
+    "demeanour": "demeanor",
+    "demilitarisation": "demilitarization",
+    "demilitarise": "demilitarize",
+    "demilitarised": "demilitarized",
+    "demilitarises": "demilitarizes",
+    "demilitarising": "demilitarizing",
+    "demobilisation": "demobilization",
+    "demobilise": "demobilize",
+    "demobilised": "demobilized",
+    "demobilises": "demobilizes",
+    "demobilising": "demobilizing",
+    "democratisation": "democratization",
+    "democratise": "democratize",
+    "democratised": "democratized",
+    "democratises": "democratizes",
+    "democratising": "democratizing",
+    "demonise": "demonize",
+    "demonised": "demonized",
+    "demonises": "demonizes",
+    "demonising": "demonizing",
+    "demoralisation": "demoralization",
+    "demoralise": "demoralize",
+    "demoralised": "demoralized",
+    "demoralises": "demoralizes",
+    "demoralising": "demoralizing",
+    "denationalisation": "denationalization",
+    "denationalise": "denationalize",
+    "denationalised": "denationalized",
+    "denationalises": "denationalizes",
+    "denationalising": "denationalizing",
+    "deodorise": "deodorize",
+    "deodorised": "deodorized",
+    "deodorises": "deodorizes",
+    "deodorising": "deodorizing",
+    "depersonalise": "depersonalize",
+    "depersonalised": "depersonalized",
+    "depersonalises": "depersonalizes",
+    "depersonalising": "depersonalizing",
+    "deputise": "deputize",
+    "deputised": "deputized",
+    "deputises": "deputizes",
+    "deputising": "deputizing",
+    "desensitisation": "desensitization",
+    "desensitise": "desensitize",
+    "desensitised": "desensitized",
+    "desensitises": "desensitizes",
+    "desensitising": "desensitizing",
+    "destabilisation": "destabilization",
+    "destabilise": "destabilize",
+    "destabilised": "destabilized",
+    "destabilises": "destabilizes",
+    "destabilising": "destabilizing",
+    "dialled": "dialed",
+    "dialling": "dialing",
+    "dialogue": "dialog",
+    "dialogues": "dialogs",
+    "diarrhoea": "diarrhea",
+    "digitise": "digitize",
+    "digitised": "digitized",
+    "digitises": "digitizes",
+    "digitising": "digitizing",
+    "disc": "disk",
+    "discolour": "discolor",
+    "discoloured": "discolored",
+    "discolouring": "discoloring",
+    "discolours": "discolors",
+    "discs": "disks",
+    "disembowelled": "disemboweled",
+    "disembowelling": "disemboweling",
+    "disfavour": "disfavor",
+    "dishevelled": "disheveled",
+    "dishonour": "dishonor",
+    "dishonourable": "dishonorable",
+    "dishonourably": "dishonorably",
+    "dishonoured": "dishonored",
+    "dishonouring": "dishonoring",
+    "dishonours": "dishonors",
+    "disorganisation": "disorganization",
+    "disorganised": "disorganized",
+    "distil": "distill",
+    "distils": "distills",
+    "dramatisation": "dramatization",
+    "dramatisations": "dramatizations",
+    "dramatise": "dramatize",
+    "dramatised": "dramatized",
+    "dramatises": "dramatizes",
+    "dramatising": "dramatizing",
+    "draught": "draft",
+    "draughtboard": "draftboard",
+    "draughtboards": "draftboards",
+    "draughtier": "draftier",
+    "draughtiest": "draftiest",
+    "draughts": "drafts",
+    "draughtsman": "draftsman",
+    "draughtsmanship": "draftsmanship",
+    "draughtsmen": "draftsmen",
+    "draughtswoman": "draftswoman",
+    "draughtswomen": "draftswomen",
+    "draughty": "drafty",
+    "drivelled": "driveled",
+    "drivelling": "driveling",
+    "duelled": "dueled",
+    "duelling": "dueling",
+    "economise": "economize",
+    "economised": "economized",
+    "economises": "economizes",
+    "economising": "economizing",
+    "edoema": "edema ",
+    "editorialise": "editorialize",
+    "editorialised": "editorialized",
+    "editorialises": "editorializes",
+    "editorialising": "editorializing",
+    "empathise": "empathize",
+    "empathised": "empathized",
+    "empathises": "empathizes",
+    "empathising": "empathizing",
+    "emphasise": "emphasize",
+    "emphasised": "emphasized",
+    "emphasises": "emphasizes",
+    "emphasising": "emphasizing",
+    "enamelled": "enameled",
+    "enamelling": "enameling",
+    "enamoured": "enamored",
+    "encyclopaedia": "encyclopedia",
+    "encyclopaedias": "encyclopedias",
+    "encyclopaedic": "encyclopedic",
+    "endeavour": "endeavor",
+    "endeavoured": "endeavored",
+    "endeavouring": "endeavoring",
+    "endeavours": "endeavors",
+    "energise": "energize",
+    "energised": "energized",
+    "energises": "energizes",
+    "energising": "energizing",
+    "enrol": "enroll",
+    "enrols": "enrolls",
+    "enthral": "enthrall",
+    "enthrals": "enthralls",
+    "epaulette": "epaulet",
+    "epaulettes": "epaulets",
+    "epicentre": "epicenter",
+    "epicentres": "epicenters",
+    "epilogue": "epilog",
+    "epilogues": "epilogs",
+    "epitomise": "epitomize",
+    "epitomised": "epitomized",
+    "epitomises": "epitomizes",
+    "epitomising": "epitomizing",
+    "equalisation": "equalization",
+    "equalise": "equalize",
+    "equalised": "equalized",
+    "equaliser": "equalizer",
+    "equalisers": "equalizers",
+    "equalises": "equalizes",
+    "equalising": "equalizing",
+    "eulogise": "eulogize",
+    "eulogised": "eulogized",
+    "eulogises": "eulogizes",
+    "eulogising": "eulogizing",
+    "evangelise": "evangelize",
+    "evangelised": "evangelized",
+    "evangelises": "evangelizes",
+    "evangelising": "evangelizing",
+    "exorcise": "exorcize",
+    "exorcised": "exorcized",
+    "exorcises": "exorcizes",
+    "exorcising": "exorcizing",
+    "extemporisation": "extemporization",
+    "extemporise": "extemporize",
+    "extemporised": "extemporized",
+    "extemporises": "extemporizes",
+    "extemporising": "extemporizing",
+    "externalisation": "externalization",
+    "externalisations": "externalizations",
+    "externalise": "externalize",
+    "externalised": "externalized",
+    "externalises": "externalizes",
+    "externalising": "externalizing",
+    "factorise": "factorize",
+    "factorised": "factorized",
+    "factorises": "factorizes",
+    "factorising": "factorizing",
+    "faecal": "fecal",
+    "faeces": "feces",
+    "familiarisation": "familiarization",
+    "familiarise": "familiarize",
+    "familiarised": "familiarized",
+    "familiarises": "familiarizes",
+    "familiarising": "familiarizing",
+    "fantasise": "fantasize",
+    "fantasised": "fantasized",
+    "fantasises": "fantasizes",
+    "fantasising": "fantasizing",
+    "favour": "favor",
+    "favourable": "favorable",
+    "favourably": "favorably",
+    "favoured": "favored",
+    "favouring": "favoring",
+    "favourite": "favorite",
+    "favourites": "favorites",
+    "favouritism": "favoritism",
+    "favours": "favors",
+    "feminise": "feminize",
+    "feminised": "feminized",
+    "feminises": "feminizes",
+    "feminising": "feminizing",
+    "fertilisation": "fertilization",
+    "fertilise": "fertilize",
+    "fertilised": "fertilized",
+    "fertiliser": "fertilizer",
+    "fertilisers": "fertilizers",
+    "fertilises": "fertilizes",
+    "fertilising": "fertilizing",
+    "fervour": "fervor",
+    "fibre": "fiber",
+    "fibreglass": "fiberglass",
+    "fibres": "fibers",
+    "fictionalisation": "fictionalization",
+    "fictionalisations": "fictionalizations",
+    "fictionalise": "fictionalize",
+    "fictionalised": "fictionalized",
+    "fictionalises": "fictionalizes",
+    "fictionalising": "fictionalizing",
+    "fillet": "filet",
+    "filleted ": "fileted ",
+    "filleting": "fileting",
+    "fillets ": "filets ",
+    "finalisation": "finalization",
+    "finalise": "finalize",
+    "finalised": "finalized",
+    "finalises": "finalizes",
+    "finalising": "finalizing",
+    "flautist": "flutist",
+    "flautists": "flutists",
+    "flavour": "flavor",
+    "flavoured": "flavored",
+    "flavouring": "flavoring",
+    "flavourings": "flavorings",
+    "flavourless": "flavorless",
+    "flavours": "flavors",
+    "flavoursome": "flavorsome",
+    "flyer / flier ": "flier / flyer ",
+    "foetal": "fetal",
+    "foetid": "fetid",
+    "foetus": "fetus",
+    "foetuses": "fetuses",
+    "formalisation": "formalization",
+    "formalise": "formalize",
+    "formalised": "formalized",
+    "formalises": "formalizes",
+    "formalising": "formalizing",
+    "fossilisation": "fossilization",
+    "fossilise": "fossilize",
+    "fossilised": "fossilized",
+    "fossilises": "fossilizes",
+    "fossilising": "fossilizing",
+    "fraternisation": "fraternization",
+    "fraternise": "fraternize",
+    "fraternised": "fraternized",
+    "fraternises": "fraternizes",
+    "fraternising": "fraternizing",
+    "fulfil": "fulfill",
+    "fulfilment": "fulfillment",
+    "fulfils": "fulfills",
+    "funnelled": "funneled",
+    "funnelling": "funneling",
+    "galvanise": "galvanize",
+    "galvanised": "galvanized",
+    "galvanises": "galvanizes",
+    "galvanising": "galvanizing",
+    "gambolled": "gamboled",
+    "gambolling": "gamboling",
+    "gaol": "jail",
+    "gaolbird": "jailbird",
+    "gaolbirds": "jailbirds",
+    "gaolbreak": "jailbreak",
+    "gaolbreaks": "jailbreaks",
+    "gaoled": "jailed",
+    "gaoler": "jailer",
+    "gaolers": "jailers",
+    "gaoling": "jailing",
+    "gaols": "jails",
+    "gases": "gasses",
+    "gauge": "gage",
+    "gauged": "gaged",
+    "gauges": "gages",
+    "gauging": "gaging",
+    "generalisation": "generalization",
+    "generalisations": "generalizations",
+    "generalise": "generalize",
+    "generalised": "generalized",
+    "generalises": "generalizes",
+    "generalising": "generalizing",
+    "ghettoise": "ghettoize",
+    "ghettoised": "ghettoized",
+    "ghettoises": "ghettoizes",
+    "ghettoising": "ghettoizing",
+    "gipsies": "gypsies",
+    "glamorise": "glamorize",
+    "glamorised": "glamorized",
+    "glamorises": "glamorizes",
+    "glamorising": "glamorizing",
+    "glamour": "glamor",
+    "globalisation": "globalization",
+    "globalise": "globalize",
+    "globalised": "globalized",
+    "globalises": "globalizes",
+    "globalising": "globalizing",
+    "glueing ": "gluing ",
+    "goitre": "goiter",
+    "goitres": "goiters",
+    "gonorrhoea": "gonorrhea",
+    "gramme": "gram",
+    "grammes": "grams",
+    "gravelled": "graveled",
+    "grey": "gray",
+    "greyed": "grayed",
+    "greying": "graying",
+    "greyish": "grayish",
+    "greyness": "grayness",
+    "greys": "grays",
+    "grovelled": "groveled",
+    "grovelling": "groveling",
+    "groyne": "groin",
+    "groynes ": "groins",
+    "gruelling": "grueling",
+    "gruellingly": "gruelingly",
+    "gryphon": "griffin",
+    "gryphons": "griffins",
+    "gynaecological": "gynecological",
+    "gynaecologist": "gynecologist",
+    "gynaecologists": "gynecologists",
+    "gynaecology": "gynecology",
+    "haematological": "hematological",
+    "haematologist": "hematologist",
+    "haematologists": "hematologists",
+    "haematology": "hematology",
+    "haemoglobin": "hemoglobin",
+    "haemophilia": "hemophilia",
+    "haemophiliac": "hemophiliac",
+    "haemophiliacs": "hemophiliacs",
+    "haemorrhage": "hemorrhage",
+    "haemorrhaged": "hemorrhaged",
+    "haemorrhages": "hemorrhages",
+    "haemorrhaging": "hemorrhaging",
+    "haemorrhoids": "hemorrhoids",
+    "harbour": "harbor",
+    "harboured": "harbored",
+    "harbouring": "harboring",
+    "harbours": "harbors",
+    "harmonisation": "harmonization",
+    "harmonise": "harmonize",
+    "harmonised": "harmonized",
+    "harmonises": "harmonizes",
+    "harmonising": "harmonizing",
+    "homoeopath": "homeopath",
+    "homoeopathic": "homeopathic",
+    "homoeopaths": "homeopaths",
+    "homoeopathy": "homeopathy",
+    "homogenise": "homogenize",
+    "homogenised": "homogenized",
+    "homogenises": "homogenizes",
+    "homogenising": "homogenizing",
+    "honour": "honor",
+    "honourable": "honorable",
+    "honourably": "honorably",
+    "honoured": "honored",
+    "honouring": "honoring",
+    "honours": "honors",
+    "hospitalisation": "hospitalization",
+    "hospitalise": "hospitalize",
+    "hospitalised": "hospitalized",
+    "hospitalises": "hospitalizes",
+    "hospitalising": "hospitalizing",
+    "humanise": "humanize",
+    "humanised": "humanized",
+    "humanises": "humanizes",
+    "humanising": "humanizing",
+    "humour": "humor",
+    "humoured": "humored",
+    "humouring": "humoring",
+    "humourless": "humorless",
+    "humours": "humors",
+    "hybridise": "hybridize",
+    "hybridised": "hybridized",
+    "hybridises": "hybridizes",
+    "hybridising": "hybridizing",
+    "hypnotise": "hypnotize",
+    "hypnotised": "hypnotized",
+    "hypnotises": "hypnotizes",
+    "hypnotising": "hypnotizing",
+    "hypothesise": "hypothesize",
+    "hypothesised": "hypothesized",
+    "hypothesises": "hypothesizes",
+    "hypothesising": "hypothesizing",
+    "idealisation": "idealization",
+    "idealise": "idealize",
+    "idealised": "idealized",
+    "idealises": "idealizes",
+    "idealising": "idealizing",
+    "idolise": "idolize",
+    "idolised": "idolized",
+    "idolises": "idolizes",
+    "idolising": "idolizing",
+    "immobilisation": "immobilization",
+    "immobilise": "immobilize",
+    "immobilised": "immobilized",
+    "immobiliser": "immobilizer",
+    "immobilisers": "immobilizers",
+    "immobilises": "immobilizes",
+    "immobilising": "immobilizing",
+    "immortalise": "immortalize",
+    "immortalised": "immortalized",
+    "immortalises": "immortalizes",
+    "immortalising": "immortalizing",
+    "immunisation": "immunization",
+    "immunise": "immunize",
+    "immunised": "immunized",
+    "immunises": "immunizes",
+    "immunising": "immunizing",
+    "impanelled": "impaneled",
+    "impanelling": "impaneling",
+    "imperilled": "imperiled",
+    "imperilling": "imperiling",
+    "individualise": "individualize",
+    "individualised": "individualized",
+    "individualises": "individualizes",
+    "individualising": "individualizing",
+    "industrialise": "industrialize",
+    "industrialised": "industrialized",
+    "industrialises": "industrializes",
+    "industrialising": "industrializing",
+    "inflexion": "inflection",
+    "inflexions": "inflections",
+    "initialise": "initialize",
+    "initialised": "initialized",
+    "initialises": "initializes",
+    "initialising": "initializing",
+    "initialled": "initialed",
+    "initialling": "initialing",
+    "instal": "install",
+    "instalment": "installment",
+    "instalments": "installments",
+    "instals": "installs",
+    "instil": "instill",
+    "instils": "instills",
+    "institutionalisation": "institutionalization",
+    "institutionalise": "institutionalize",
+    "institutionalised": "institutionalized",
+    "institutionalises": "institutionalizes",
+    "institutionalising": "institutionalizing",
+    "intellectualise": "intellectualize",
+    "intellectualised": "intellectualized",
+    "intellectualises": "intellectualizes",
+    "intellectualising": "intellectualizing",
+    "internalisation": "internalization",
+    "internalise": "internalize",
+    "internalised": "internalized",
+    "internalises": "internalizes",
+    "internalising": "internalizing",
+    "internationalisation": "internationalization",
+    "internationalise": "internationalize",
+    "internationalised": "internationalized",
+    "internationalises": "internationalizes",
+    "internationalising": "internationalizing",
+    "ionisation": "ionization",
+    "ionise": "ionize",
+    "ionised": "ionized",
+    "ioniser": "ionizer",
+    "ionisers": "ionizers",
+    "ionises": "ionizes",
+    "ionising": "ionizing",
+    "italicise": "italicize",
+    "italicised": "italicized",
+    "italicises": "italicizes",
+    "italicising": "italicizing",
+    "itemise": "itemize",
+    "itemised": "itemized",
+    "itemises": "itemizes",
+    "itemising": "itemizing",
+    "jeopardise": "jeopardize",
+    "jeopardised": "jeopardized",
+    "jeopardises": "jeopardizes",
+    "jeopardising": "jeopardizing",
+    "jewelled": "jeweled",
+    "jeweller": "jeweler",
+    "jewellers": "jewelers",
+    "jewellery": "jewelry",
+    "judgement ": "judgment",
+    "kilogramme": "kilogram",
+    "kilogrammes": "kilograms",
+    "kilometre": "kilometer",
+    "kilometres": "kilometers",
+    "labelled": "labeled",
+    "labelling": "labeling",
+    "labour": "labor",
+    "laboured": "labored",
+    "labourer": "laborer",
+    "labourers": "laborers",
+    "labouring": "laboring",
+    "labours": "labors",
+    "lacklustre": "lackluster",
+    "legalisation": "legalization",
+    "legalise": "legalize",
+    "legalised": "legalized",
+    "legalises": "legalizes",
+    "legalising": "legalizing",
+    "legitimise": "legitimize",
+    "legitimised": "legitimized",
+    "legitimises": "legitimizes",
+    "legitimising": "legitimizing",
+    "leukaemia": "leukemia",
+    "levelled": "leveled",
+    "leveller": "leveler",
+    "levellers": "levelers",
+    "levelling": "leveling",
+    "libelled": "libeled",
+    "libelling": "libeling",
+    "libellous": "libelous",
+    "liberalisation": "liberalization",
+    "liberalise": "liberalize",
+    "liberalised": "liberalized",
+    "liberalises": "liberalizes",
+    "liberalising": "liberalizing",
+    "licence": "license",
+    "licenced": "licensed",
+    "licences": "licenses",
+    "licencing": "licensing",
+    "likeable": "likable ",
+    "lionisation": "lionization",
+    "lionise": "lionize",
+    "lionised": "lionized",
+    "lionises": "lionizes",
+    "lionising": "lionizing",
+    "liquidise": "liquidize",
+    "liquidised": "liquidized",
+    "liquidiser": "liquidizer",
+    "liquidisers": "liquidizers",
+    "liquidises": "liquidizes",
+    "liquidising": "liquidizing",
+    "litre": "liter",
+    "litres": "liters",
+    "localise": "localize",
+    "localised": "localized",
+    "localises": "localizes",
+    "localising": "localizing",
+    "louvre": "louver",
+    "louvred": "louvered",
+    "louvres": "louvers ",
+    "lustre": "luster",
+    "magnetise": "magnetize",
+    "magnetised": "magnetized",
+    "magnetises": "magnetizes",
+    "magnetising": "magnetizing",
+    "manoeuvrability": "maneuverability",
+    "manoeuvrable": "maneuverable",
+    "manoeuvre": "maneuver",
+    "manoeuvred": "maneuvered",
+    "manoeuvres": "maneuvers",
+    "manoeuvring": "maneuvering",
+    "manoeuvrings": "maneuverings",
+    "marginalisation": "marginalization",
+    "marginalise": "marginalize",
+    "marginalised": "marginalized",
+    "marginalises": "marginalizes",
+    "marginalising": "marginalizing",
+    "marshalled": "marshaled",
+    "marshalling": "marshaling",
+    "marvelled": "marveled",
+    "marvelling": "marveling",
+    "marvellous": "marvelous",
+    "marvellously": "marvelously",
+    "materialisation": "materialization",
+    "materialise": "materialize",
+    "materialised": "materialized",
+    "materialises": "materializes",
+    "materialising": "materializing",
+    "maximisation": "maximization",
+    "maximise": "maximize",
+    "maximised": "maximized",
+    "maximises": "maximizes",
+    "maximising": "maximizing",
+    "meagre": "meager",
+    "mechanisation": "mechanization",
+    "mechanise": "mechanize",
+    "mechanised": "mechanized",
+    "mechanises": "mechanizes",
+    "mechanising": "mechanizing",
+    "mediaeval": "medieval",
+    "memorialise": "memorialize",
+    "memorialised": "memorialized",
+    "memorialises": "memorializes",
+    "memorialising": "memorializing",
+    "memorise": "memorize",
+    "memorised": "memorized",
+    "memorises": "memorizes",
+    "memorising": "memorizing",
+    "mesmerise": "mesmerize",
+    "mesmerised": "mesmerized",
+    "mesmerises": "mesmerizes",
+    "mesmerising": "mesmerizing",
+    "metabolise": "metabolize",
+    "metabolised": "metabolized",
+    "metabolises": "metabolizes",
+    "metabolising": "metabolizing",
+    "metre": "meter",
+    "metres": "meters",
+    "micrometre": "micrometer",
+    "micrometres": "micrometers",
+    "militarise": "militarize",
+    "militarised": "militarized",
+    "militarises": "militarizes",
+    "militarising": "militarizing",
+    "milligramme": "milligram",
+    "milligrammes": "milligrams",
+    "millilitre": "milliliter",
+    "millilitres": "milliliters",
+    "millimetre": "millimeter",
+    "millimetres": "millimeters",
+    "miniaturisation": "miniaturization",
+    "miniaturise": "miniaturize",
+    "miniaturised": "miniaturized",
+    "miniaturises": "miniaturizes",
+    "miniaturising": "miniaturizing",
+    "minibuses": "minibusses ",
+    "minimise": "minimize",
+    "minimised": "minimized",
+    "minimises": "minimizes",
+    "minimising": "minimizing",
+    "misbehaviour": "misbehavior",
+    "misdemeanour": "misdemeanor",
+    "misdemeanours": "misdemeanors",
+    "misspelt": "misspelled ",
+    "mitre": "miter",
+    "mitres": "miters",
+    "mobilisation": "mobilization",
+    "mobilise": "mobilize",
+    "mobilised": "mobilized",
+    "mobilises": "mobilizes",
+    "mobilising": "mobilizing",
+    "modelled": "modeled",
+    "modeller": "modeler",
+    "modellers": "modelers",
+    "modelling": "modeling",
+    "modernise": "modernize",
+    "modernised": "modernized",
+    "modernises": "modernizes",
+    "modernising": "modernizing",
+    "moisturise": "moisturize",
+    "moisturised": "moisturized",
+    "moisturiser": "moisturizer",
+    "moisturisers": "moisturizers",
+    "moisturises": "moisturizes",
+    "moisturising": "moisturizing",
+    "monologue": "monolog",
+    "monologues": "monologs",
+    "monopolisation": "monopolization",
+    "monopolise": "monopolize",
+    "monopolised": "monopolized",
+    "monopolises": "monopolizes",
+    "monopolising": "monopolizing",
+    "moralise": "moralize",
+    "moralised": "moralized",
+    "moralises": "moralizes",
+    "moralising": "moralizing",
+    "motorised": "motorized",
+    "mould": "mold",
+    "moulded": "molded",
+    "moulder": "molder",
+    "mouldered": "moldered",
+    "mouldering": "moldering",
+    "moulders": "molders",
+    "mouldier": "moldier",
+    "mouldiest": "moldiest",
+    "moulding": "molding",
+    "mouldings": "moldings",
+    "moulds": "molds",
+    "mouldy": "moldy",
+    "moult": "molt",
+    "moulted": "molted",
+    "moulting": "molting",
+    "moults": "molts",
+    "moustache": "mustache",
+    "moustached": "mustached",
+    "moustaches": "mustaches",
+    "moustachioed": "mustachioed",
+    "multicoloured": "multicolored",
+    "nationalisation": "nationalization",
+    "nationalisations": "nationalizations",
+    "nationalise": "nationalize",
+    "nationalised": "nationalized",
+    "nationalises": "nationalizes",
+    "nationalising": "nationalizing",
+    "naturalisation": "naturalization",
+    "naturalise": "naturalize",
+    "naturalised": "naturalized",
+    "naturalises": "naturalizes",
+    "naturalising": "naturalizing",
+    "neighbour": "neighbor",
+    "neighbourhood": "neighborhood",
+    "neighbourhoods": "neighborhoods",
+    "neighbouring": "neighboring",
+    "neighbourliness": "neighborliness",
+    "neighbourly": "neighborly",
+    "neighbours": "neighbors",
+    "neutralisation": "neutralization",
+    "neutralise": "neutralize",
+    "neutralised": "neutralized",
+    "neutralises": "neutralizes",
+    "neutralising": "neutralizing",
+    "normalisation": "normalization",
+    "normalise": "normalize",
+    "normalised": "normalized",
+    "normalises": "normalizes",
+    "normalising": "normalizing",
+    "odour": "odor",
+    "odourless": "odorless",
+    "odours": "odors",
+    "oesophagus": "esophagus",
+    "oesophaguses": "esophaguses",
+    "oestrogen": "estrogen",
+    "offence": "offense",
+    "offences": "offenses",
+    "omelette": "omelet",
+    "omelettes": "omelets",
+    "optimise": "optimize",
+    "optimised": "optimized",
+    "optimises": "optimizes",
+    "optimising": "optimizing",
+    "organisation": "organization",
+    "organisational": "organizational",
+    "organisations": "organizations",
+    "organise": "organize",
+    "organised": "organized",
+    "organiser": "organizer",
+    "organisers": "organizers",
+    "organises": "organizes",
+    "organising": "organizing",
+    "orthopaedic": "orthopedic",
+    "orthopaedics": "orthopedics",
+    "ostracise": "ostracize",
+    "ostracised": "ostracized",
+    "ostracises": "ostracizes",
+    "ostracising": "ostracizing",
+    "outmanoeuvre": "outmaneuver",
+    "outmanoeuvred": "outmaneuvered",
+    "outmanoeuvres": "outmaneuvers",
+    "outmanoeuvring": "outmaneuvering",
+    "overemphasise": "overemphasize",
+    "overemphasised": "overemphasized",
+    "overemphasises": "overemphasizes",
+    "overemphasising": "overemphasizing",
+    "oxidisation": "oxidization",
+    "oxidise": "oxidize",
+    "oxidised": "oxidized",
+    "oxidises": "oxidizes",
+    "oxidising": "oxidizing",
+    "paederast": "pederast",
+    "paederasts": "pederasts",
+    "paediatric": "pediatric",
+    "paediatrician": "pediatrician",
+    "paediatricians": "pediatricians",
+    "paediatrics": "pediatrics",
+    "paedophile": "pedophile",
+    "paedophiles": "pedophiles",
+    "paedophilia": "pedophilia",
+    "palaeolithic": "paleolithic",
+    "palaeontologist": "paleontologist",
+    "palaeontologists": "paleontologists",
+    "palaeontology": "paleontology",
+    "panelled": "paneled",
+    "panelling": "paneling",
+    "panellist": "panelist",
+    "panellists": "panelists",
+    "paralyse": "paralyze",
+    "paralysed": "paralyzed",
+    "paralyses": "paralyzes",
+    "paralysing": "paralyzing",
+    "parcelled": "parceled",
+    "parcelling": "parceling",
+    "parlour": "parlor",
+    "parlours": "parlors",
+    "particularise": "particularize",
+    "particularised": "particularized",
+    "particularises": "particularizes",
+    "particularising": "particularizing",
+    "passivisation": "passivization",
+    "passivise": "passivize",
+    "passivised": "passivized",
+    "passivises": "passivizes",
+    "passivising": "passivizing",
+    "pasteurisation": "pasteurization",
+    "pasteurise": "pasteurize",
+    "pasteurised": "pasteurized",
+    "pasteurises": "pasteurizes",
+    "pasteurising": "pasteurizing",
+    "patronise": "patronize",
+    "patronised": "patronized",
+    "patronises": "patronizes",
+    "patronising": "patronizing",
+    "patronisingly": "patronizingly",
+    "pedalled": "pedaled",
+    "pedalling": "pedaling",
+    "pedestrianisation": "pedestrianization",
+    "pedestrianise": "pedestrianize",
+    "pedestrianised": "pedestrianized",
+    "pedestrianises": "pedestrianizes",
+    "pedestrianising": "pedestrianizing",
+    "penalise": "penalize",
+    "penalised": "penalized",
+    "penalises": "penalizes",
+    "penalising": "penalizing",
+    "pencilled": "penciled",
+    "pencilling": "penciling",
+    "personalise": "personalize",
+    "personalised": "personalized",
+    "personalises": "personalizes",
+    "personalising": "personalizing",
+    "pharmacopoeia": "pharmacopeia",
+    "pharmacopoeias": "pharmacopeias",
+    "philosophise": "philosophize",
+    "philosophised": "philosophized",
+    "philosophises": "philosophizes",
+    "philosophising": "philosophizing",
+    "philtre": "filter",
+    "philtres": "filters",
+    "phoney ": "phony ",
+    "plagiarise": "plagiarize",
+    "plagiarised": "plagiarized",
+    "plagiarises": "plagiarizes",
+    "plagiarising": "plagiarizing",
+    "plough": "plow",
+    "ploughed": "plowed",
+    "ploughing": "plowing",
+    "ploughman": "plowman",
+    "ploughmen": "plowmen",
+    "ploughs": "plows",
+    "ploughshare": "plowshare",
+    "ploughshares": "plowshares",
+    "polarisation": "polarization",
+    "polarise": "polarize",
+    "polarised": "polarized",
+    "polarises": "polarizes",
+    "polarising": "polarizing",
+    "politicisation": "politicization",
+    "politicise": "politicize",
+    "politicised": "politicized",
+    "politicises": "politicizes",
+    "politicising": "politicizing",
+    "popularisation": "popularization",
+    "popularise": "popularize",
+    "popularised": "popularized",
+    "popularises": "popularizes",
+    "popularising": "popularizing",
+    "pouffe": "pouf",
+    "pouffes": "poufs",
+    "practise": "practice",
+    "practised": "practiced",
+    "practises": "practices",
+    "practising ": "practicing ",
+    "praesidium": "presidium",
+    "praesidiums ": "presidiums ",
+    "pressurisation": "pressurization",
+    "pressurise": "pressurize",
+    "pressurised": "pressurized",
+    "pressurises": "pressurizes",
+    "pressurising": "pressurizing",
+    "pretence": "pretense",
+    "pretences": "pretenses",
+    "primaeval": "primeval",
+    "prioritisation": "prioritization",
+    "prioritise": "prioritize",
+    "prioritised": "prioritized",
+    "prioritises": "prioritizes",
+    "prioritising": "prioritizing",
+    "privatisation": "privatization",
+    "privatisations": "privatizations",
+    "privatise": "privatize",
+    "privatised": "privatized",
+    "privatises": "privatizes",
+    "privatising": "privatizing",
+    "professionalisation": "professionalization",
+    "professionalise": "professionalize",
+    "professionalised": "professionalized",
+    "professionalises": "professionalizes",
+    "professionalising": "professionalizing",
+    "programme": "program",
+    "programmes": "programs",
+    "prologue": "prolog",
+    "prologues": "prologs",
+    "propagandise": "propagandize",
+    "propagandised": "propagandized",
+    "propagandises": "propagandizes",
+    "propagandising": "propagandizing",
+    "proselytise": "proselytize",
+    "proselytised": "proselytized",
+    "proselytiser": "proselytizer",
+    "proselytisers": "proselytizers",
+    "proselytises": "proselytizes",
+    "proselytising": "proselytizing",
+    "psychoanalyse": "psychoanalyze",
+    "psychoanalysed": "psychoanalyzed",
+    "psychoanalyses": "psychoanalyzes",
+    "psychoanalysing": "psychoanalyzing",
+    "publicise": "publicize",
+    "publicised": "publicized",
+    "publicises": "publicizes",
+    "publicising": "publicizing",
+    "pulverisation": "pulverization",
+    "pulverise": "pulverize",
+    "pulverised": "pulverized",
+    "pulverises": "pulverizes",
+    "pulverising": "pulverizing",
+    "pummelled": "pummel",
+    "pummelling": "pummeled",
+    "pyjama": "pajama",
+    "pyjamas": "pajamas",
+    "pzazz": "pizzazz",
+    "quarrelled": "quarreled",
+    "quarrelling": "quarreling",
+    "radicalise": "radicalize",
+    "radicalised": "radicalized",
+    "radicalises": "radicalizes",
+    "radicalising": "radicalizing",
+    "rancour": "rancor",
+    "randomise": "randomize",
+    "randomised": "randomized",
+    "randomises": "randomizes",
+    "randomising": "randomizing",
+    "rationalisation": "rationalization",
+    "rationalisations": "rationalizations",
+    "rationalise": "rationalize",
+    "rationalised": "rationalized",
+    "rationalises": "rationalizes",
+    "rationalising": "rationalizing",
+    "ravelled": "raveled",
+    "ravelling": "raveling",
+    "realisable": "realizable",
+    "realisation": "realization",
+    "realisations": "realizations",
+    "realise": "realize",
+    "realised": "realized",
+    "realises": "realizes",
+    "realising": "realizing",
+    "recognisable": "recognizable",
+    "recognisably": "recognizably",
+    "recognisance": "recognizance",
+    "recognise": "recognize",
+    "recognised": "recognized",
+    "recognises": "recognizes",
+    "recognising": "recognizing",
+    "reconnoitre": "reconnoiter",
+    "reconnoitred": "reconnoitered",
+    "reconnoitres": "reconnoiters",
+    "reconnoitring": "reconnoitering",
+    "refuelled": "refueled",
+    "refuelling": "refueling",
+    "regularisation": "regularization",
+    "regularise": "regularize",
+    "regularised": "regularized",
+    "regularises": "regularizes",
+    "regularising": "regularizing",
+    "remodelled": "remodeled",
+    "remodelling": "remodeling",
+    "remould": "remold",
+    "remoulded": "remolded",
+    "remoulding": "remolding",
+    "remoulds": "remolds",
+    "reorganisation": "reorganization",
+    "reorganisations": "reorganizations",
+    "reorganise": "reorganize",
+    "reorganised": "reorganized",
+    "reorganises": "reorganizes",
+    "reorganising": "reorganizing",
+    "revelled": "reveled",
+    "reveller": "reveler",
+    "revellers": "revelers",
+    "revelling": "reveling",
+    "revitalise": "revitalize",
+    "revitalised": "revitalized",
+    "revitalises": "revitalizes",
+    "revitalising": "revitalizing",
+    "revolutionise": "revolutionize",
+    "revolutionised": "revolutionized",
+    "revolutionises": "revolutionizes",
+    "revolutionising": "revolutionizing",
+    "rhapsodise": "rhapsodize",
+    "rhapsodised": "rhapsodized",
+    "rhapsodises": "rhapsodizes",
+    "rhapsodising": "rhapsodizing",
+    "rigour": "rigor",
+    "rigours": "rigors",
+    "ritualised": "ritualized",
+    "rivalled": "rivaled",
+    "rivalling": "rivaling",
+    "romanticise": "romanticize",
+    "romanticised": "romanticized",
+    "romanticises": "romanticizes",
+    "romanticising": "romanticizing",
+    "rumour": "rumor",
+    "rumoured": "rumored",
+    "rumours": "rumors",
+    "sabre": "saber",
+    "sabres": "sabers",
+    "saltpetre": "saltpeter",
+    "sanitise": "sanitize",
+    "sanitised": "sanitized",
+    "sanitises": "sanitizes",
+    "sanitising": "sanitizing",
+    "satirise": "satirize",
+    "satirised": "satirized",
+    "satirises": "satirizes",
+    "satirising": "satirizing",
+    "saviour": "savior",
+    "saviours": "saviors",
+    "savour": "savor",
+    "savoured": "savored",
+    "savouries": "savories",
+    "savouring": "savoring",
+    "savours": "savors",
+    "savoury": "savory",
+    "scandalise": "scandalize",
+    "scandalised": "scandalized",
+    "scandalises": "scandalizes",
+    "scandalising": "scandalizing",
+    "sceptic": "skeptic",
+    "sceptical": "skeptical",
+    "sceptically": "skeptically",
+    "scepticism": "skepticism",
+    "sceptics": "skeptics",
+    "sceptre": "scepter",
+    "sceptres": "scepters",
+    "scrutinise": "scrutinize",
+    "scrutinised": "scrutinized",
+    "scrutinises": "scrutinizes",
+    "scrutinising": "scrutinizing",
+    "secularisation": "secularization",
+    "secularise": "secularize",
+    "secularised": "secularized",
+    "secularises": "secularizes",
+    "secularising": "secularizing",
+    "sensationalise": "sensationalize",
+    "sensationalised": "sensationalized",
+    "sensationalises": "sensationalizes",
+    "sensationalising": "sensationalizing",
+    "sensitise": "sensitize",
+    "sensitised": "sensitized",
+    "sensitises": "sensitizes",
+    "sensitising": "sensitizing",
+    "sentimentalise": "sentimentalize",
+    "sentimentalised": "sentimentalized",
+    "sentimentalises": "sentimentalizes",
+    "sentimentalising": "sentimentalizing",
+    "sepulchre": "sepulcher",
+    "sepulchres": "sepulchers ",
+    "serialisation": "serialization",
+    "serialisations": "serializations",
+    "serialise": "serialize",
+    "serialised": "serialized",
+    "serialises": "serializes",
+    "serialising": "serializing",
+    "sermonise": "sermonize",
+    "sermonised": "sermonized",
+    "sermonises": "sermonizes",
+    "sermonising": "sermonizing",
+    "sheikh ": "sheik ",
+    "shovelled": "shoveled",
+    "shovelling": "shoveling",
+    "shrivelled": "shriveled",
+    "shrivelling": "shriveling",
+    "signalise": "signalize",
+    "signalised": "signalized",
+    "signalises": "signalizes",
+    "signalising": "signalizing",
+    "signalled": "signaled",
+    "signalling": "signaling",
+    "smoulder": "smolder",
+    "smouldered": "smoldered",
+    "smouldering": "smoldering",
+    "smoulders": "smolders",
+    "snivelled": "sniveled",
+    "snivelling": "sniveling",
+    "snorkelled": "snorkeled",
+    "snorkelling": "snorkeling",
+    "snowplough": "snowplow",
+    "snowploughs": "snowplow",
+    "socialisation": "socialization",
+    "socialise": "socialize",
+    "socialised": "socialized",
+    "socialises": "socializes",
+    "socialising": "socializing",
+    "sodomise": "sodomize",
+    "sodomised": "sodomized",
+    "sodomises": "sodomizes",
+    "sodomising": "sodomizing",
+    "solemnise": "solemnize",
+    "solemnised": "solemnized",
+    "solemnises": "solemnizes",
+    "solemnising": "solemnizing",
+    "sombre": "somber",
+    "specialisation": "specialization",
+    "specialisations": "specializations",
+    "specialise": "specialize",
+    "specialised": "specialized",
+    "specialises": "specializes",
+    "specialising": "specializing",
+    "spectre": "specter",
+    "spectres": "specters",
+    "spiralled": "spiraled",
+    "spiralling": "spiraling",
+    "splendour": "splendor",
+    "splendours": "splendors",
+    "squirrelled": "squirreled",
+    "squirrelling": "squirreling",
+    "stabilisation": "stabilization",
+    "stabilise": "stabilize",
+    "stabilised": "stabilized",
+    "stabiliser": "stabilizer",
+    "stabilisers": "stabilizers",
+    "stabilises": "stabilizes",
+    "stabilising": "stabilizing",
+    "standardisation": "standardization",
+    "standardise": "standardize",
+    "standardised": "standardized",
+    "standardises": "standardizes",
+    "standardising": "standardizing",
+    "stencilled": "stenciled",
+    "stencilling": "stenciling",
+    "sterilisation": "sterilization",
+    "sterilisations": "sterilizations",
+    "sterilise": "sterilize",
+    "sterilised": "sterilized",
+    "steriliser": "sterilizer",
+    "sterilisers": "sterilizers",
+    "sterilises": "sterilizes",
+    "sterilising": "sterilizing",
+    "stigmatisation": "stigmatization",
+    "stigmatise": "stigmatize",
+    "stigmatised": "stigmatized",
+    "stigmatises": "stigmatizes",
+    "stigmatising": "stigmatizing",
+    "storey": "story",
+    "storeys": "stories",
+    "subsidisation": "subsidization",
+    "subsidise": "subsidize",
+    "subsidised": "subsidized",
+    "subsidiser": "subsidizer",
+    "subsidisers": "subsidizers",
+    "subsidises": "subsidizes",
+    "subsidising": "subsidizing",
+    "succour": "succor",
+    "succoured": "succored",
+    "succouring": "succoring",
+    "succours": "succors",
+    "sulphate": "sulfate",
+    "sulphates": "sulfates",
+    "sulphide": "sulfide",
+    "sulphides": "sulfides",
+    "sulphur": "sulfur",
+    "sulphurous": "sulfurous",
+    "summarise": "summarize",
+    "summarised": "summarized",
+    "summarises": "summarizes",
+    "summarising": "summarizing",
+    "swivelled": "swiveled",
+    "swivelling": "swiveling",
+    "symbolise": "symbolize",
+    "symbolised": "symbolized",
+    "symbolises": "symbolizes",
+    "symbolising": "symbolizing",
+    "sympathise": "sympathize",
+    "sympathised": "sympathized",
+    "sympathiser": "sympathizer",
+    "sympathisers": "sympathizers",
+    "sympathises": "sympathizes",
+    "sympathising": "sympathizing",
+    "synchronisation": "synchronization",
+    "synchronise": "synchronize",
+    "synchronised": "synchronized",
+    "synchronises": "synchronizes",
+    "synchronising": "synchronizing",
+    "synthesise": "synthesize",
+    "synthesised": "synthesized",
+    "synthesiser": "synthesizer",
+    "synthesisers": "synthesizers",
+    "synthesises": "synthesizes",
+    "synthesising": "synthesizing",
+    "syphon": "siphon",
+    "syphoned": "siphoned",
+    "syphoning": "siphoning",
+    "syphons": "siphons",
+    "systematisation": "systematization",
+    "systematise": "systematize",
+    "systematised": "systematized",
+    "systematises": "systematizes",
+    "systematising": "systematizing",
+    "tantalise": "tantalize",
+    "tantalised": "tantalized",
+    "tantalises": "tantalizes",
+    "tantalising": "tantalizing",
+    "tantalisingly": "tantalizingly",
+    "tasselled": "tasseled",
+    "technicolour": "technicolor",
+    "temporise": "temporize",
+    "temporised": "temporized",
+    "temporises": "temporizes",
+    "temporising": "temporizing",
+    "tenderise": "tenderize",
+    "tenderised": "tenderized",
+    "tenderises": "tenderizes",
+    "tenderising": "tenderizing",
+    "terrorise": "terrorize",
+    "terrorised": "terrorized",
+    "terrorises": "terrorizes",
+    "terrorising": "terrorizing",
+    "theatre": "theater",
+    "theatregoer": "theatergoer",
+    "theatregoers": "theatergoers",
+    "theatres": "theaters",
+    "theorise": "theorize",
+    "theorised": "theorized",
+    "theorises": "theorizes",
+    "theorising": "theorizing",
+    "tonne": "ton",
+    "tonnes": "tons",
+    "towelled": "toweled",
+    "towelling": "toweling",
+    "toxaemia": "toxemia",
+    "tranquillise": "tranquilize",
+    "tranquillised": "tranquilized",
+    "tranquilliser": "tranquilizer",
+    "tranquillisers": "tranquilizers",
+    "tranquillises": "tranquilizes",
+    "tranquillising": "tranquilizing",
+    "tranquillity": "tranquility",
+    "tranquillize": "tranquilize",
+    "tranquillized": "tranquilized",
+    "tranquillizer": "tranquilizer",
+    "tranquillizers": "tranquilizers",
+    "tranquillizes": "tranquilizes",
+    "tranquillizing": "tranquilizing",
+    "tranquilly": "tranquility",
+    "transistorised": "transistorized",
+    "traumatise": "traumatize",
+    "traumatised": "traumatized",
+    "traumatises": "traumatizes",
+    "traumatising": "traumatizing",
+    "travelled": "traveled",
+    "traveller": "traveler",
+    "travellers": "travelers",
+    "travelling": "traveling",
+    "travelogue": "travelog",
+    "travelogues ": "travelogs ",
+    "trialled": "trialed",
+    "trialling": "trialing",
+    "tricolour": "tricolor",
+    "tricolours": "tricolors",
+    "trivialise": "trivialize",
+    "trivialised": "trivialized",
+    "trivialises": "trivializes",
+    "trivialising": "trivializing",
+    "tumour": "tumor",
+    "tumours": "tumors",
+    "tunnelled": "tunneled",
+    "tunnelling": "tunneling",
+    "tyrannise": "tyrannize",
+    "tyrannised": "tyrannized",
+    "tyrannises": "tyrannizes",
+    "tyrannising": "tyrannizing",
+    "tyre": "tire",
+    "tyres": "tires",
+    "unauthorised": "unauthorized",
+    "uncivilised": "uncivilized",
+    "underutilised": "underutilized",
+    "unequalled": "unequaled",
+    "unfavourable": "unfavorable",
+    "unfavourably": "unfavorably",
+    "unionisation": "unionization",
+    "unionise": "unionize",
+    "unionised": "unionized",
+    "unionises": "unionizes",
+    "unionising": "unionizing",
+    "unorganised": "unorganized",
+    "unravelled": "unraveled",
+    "unravelling": "unraveling",
+    "unrecognisable": "unrecognizable",
+    "unrecognised": "unrecognized",
+    "unrivalled": "unrivaled",
+    "unsavoury": "unsavory",
+    "untrammelled": "untrammeled",
+    "urbanisation": "urbanization",
+    "urbanise": "urbanize",
+    "urbanised": "urbanized",
+    "urbanises": "urbanizes",
+    "urbanising": "urbanizing",
+    "utilisable": "utilizable",
+    "utilisation": "utilization",
+    "utilise": "utilize",
+    "utilised": "utilized",
+    "utilises": "utilizes",
+    "utilising": "utilizing",
+    "valour": "valor",
+    "vandalise": "vandalize",
+    "vandalised": "vandalized",
+    "vandalises": "vandalizes",
+    "vandalising": "vandalizing",
+    "vaporisation": "vaporization",
+    "vaporise": "vaporize",
+    "vaporised": "vaporized",
+    "vaporises": "vaporizes",
+    "vaporising": "vaporizing",
+    "vapour": "vapor",
+    "vapours": "vapors",
+    "verbalise": "verbalize",
+    "verbalised": "verbalized",
+    "verbalises": "verbalizes",
+    "verbalising": "verbalizing",
+    "victimisation": "victimization",
+    "victimise": "victimize",
+    "victimised": "victimized",
+    "victimises": "victimizes",
+    "victimising": "victimizing",
+    "videodisc": "videodisk",
+    "videodiscs": "videodisks",
+    "vigour": "vigor",
+    "visualisation": "visualization",
+    "visualisations": "visualizations",
+    "visualise": "visualize",
+    "visualised": "visualized",
+    "visualises": "visualizes",
+    "visualising": "visualizing",
+    "vocalisation": "vocalization",
+    "vocalisations": "vocalizations",
+    "vocalise": "vocalize",
+    "vocalised": "vocalized",
+    "vocalises": "vocalizes",
+    "vocalising": "vocalizing",
+    "vulcanised": "vulcanized",
+    "vulgarisation": "vulgarization",
+    "vulgarise": "vulgarize",
+    "vulgarised": "vulgarized",
+    "vulgarises": "vulgarizes",
+    "vulgarising": "vulgarizing",
+    "waggon": "wagon",
+    "waggons": "wagons",
+    "watercolour": "watercolor",
+    "watercolours": "watercolors",
+    "weaselled": "weaseled",
+    "weaselling": "weaseling",
+    "westernisation": "westernization",
+    "westernise": "westernize",
+    "westernised": "westernized",
+    "westernises": "westernizes",
+    "westernising": "westernizing",
+    "womanise": "womanize",
+    "womanised": "womanized",
+    "womaniser": "womanizer",
+    "womanisers": "womanizers",
+    "womanises": "womanizes",
+    "womanising": "womanizing",
+    "woollen": "woolen",
+    "woollens": "woolens",
+    "woollies": "woolies",
+    "woolly": "wooly",
+    "worshipped ": "worshiped",
+    "worshipping ": "worshiping ",
+    "worshipper": "worshiper",
+    "yodelled": "yodeled",
+    "yodelling": "yodeling",
+    "yoghourt": "yogurt",
+    "yoghourts": "yogurts",
+    "yoghurt": "yogurt",
+    "yoghurts": "yogurts"
+}
+
+
+for string, norm in _exc.items():
+    _exc[string.title()] = norm
+
+
+NORM_EXCEPTIONS = _exc