From 955b95cb8ba5185525e736089f7c81743f32efa0 Mon Sep 17 00:00:00 2001
From: Brad Jascob <bjascob@msn.com>
Date: Sat, 4 May 2019 10:16:03 -0600
Subject: [PATCH] Fix inconsistant lemmatizer issue #3484 (#3646)

* Fix inconsistant lemmatizer issue #3484

* Remove test case
---
 spacy/lemmatizer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py
index 1aea308f9..f9e35f44a 100644
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@@ -1,5 +1,6 @@
 # coding: utf8
 from __future__ import unicode_literals
+from collections import OrderedDict
 
 from .symbols import POS, NOUN, VERB, ADJ, PUNCT, PROPN
 from .symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos
@@ -118,8 +119,8 @@ def lemmatize(string, index, exceptions, rules):
                 forms.append(form)
             else:
                 oov_forms.append(form)
-    # Remove duplicates, and sort forms generated by rules alphabetically.
-    forms = list(set(forms))
+    # Remove duplicates but preserve the ordering of applied "rules"
+    forms = list(OrderedDict.fromkeys(forms))
     # Put exceptions at the front of the list, so they get priority.
     # This is a dodgy heuristic -- but it's the best we can do until we get
     # frequencies on this. We can at least prune out problematic exceptions,