From 0a6d7ca2006d520883361d9922282679c4d2d6cc Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Wed, 8 Mar 2017 14:33:32 +0100
Subject: [PATCH] Fix spacing after token_match

The boolean flag indicating a space after the token was
being set incorrectly after the token_match regex was applied.
Fixes #859.
---
 spacy/tokenizer.pyx | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 8f2f111e7..1b74431ff 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -275,7 +275,10 @@ cdef class Tokenizer:
             if cache_hit:
                 pass
             elif self.token_match and self.token_match(string): 
-                tokens.push_back(self.vocab.get(tokens.mem, string), not suffixes.size())
+                # We're always saying 'no' to spaces here -- the caller will
+                # fix up the outermost one, with reference to the original.
+                # See Issue #859
+                tokens.push_back(self.vocab.get(tokens.mem, string), False)
             else:
                 matches = self.find_infix(string)
                 if not matches: