From ae348bee4327f132576a09ffafc8422075407d93 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 26 Sep 2019 14:43:22 +0200
Subject: [PATCH] Switch to PhraseMatcher.find_matches

---
 spacy/tokenizer.pxd | 3 ++-
 spacy/tokenizer.pyx | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/spacy/tokenizer.pxd b/spacy/tokenizer.pxd
index 69e80250c..506076ac1 100644
--- a/spacy/tokenizer.pxd
+++ b/spacy/tokenizer.pxd
@@ -8,6 +8,7 @@ from .structs cimport LexemeC, TokenC
 from .strings cimport StringStore
 from .tokens.doc cimport Doc
 from .vocab cimport Vocab, LexemesOrTokens, _Cached
+from .matcher.phrasematcher cimport PhraseMatcher, MatchStruct
 
 
 cdef class Tokenizer:
@@ -21,7 +22,7 @@ cdef class Tokenizer:
     cdef object _suffix_search
     cdef object _infix_finditer
     cdef object _rules
-    cdef object _special_matcher
+    cdef PhraseMatcher _special_matcher
     cdef int _property_init_count
     cdef int _property_init_max
 
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index dc1ee98c1..8433f2ca5 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -22,7 +22,6 @@ from . import util
 
 from .attrs import intify_attrs
 from .lexeme cimport EMPTY_LEXEME
-from .matcher import PhraseMatcher
 from .symbols import ORTH
 
 cdef class Tokenizer:
@@ -242,10 +241,12 @@ cdef class Tokenizer:
         cdef int orig_final_spacy
         cdef int orig_idx
         cdef Pool mem = Pool()
-        spans = [doc[match[1]:match[2]] for match in self._special_matcher(doc)]
+        cdef vector[MatchStruct] c_matches
+        self._special_matcher.find_matches(doc, &c_matches)
         # Skip processing if no matches
-        if len(spans) == 0:
+        if c_matches.size() == 0:
             return True
+        spans = [doc[match.start:match.end] for match in c_matches]
         spans = util.filter_spans(spans)
         # Put span info in span.start-indexed dict and calculate maximum
         # intermediate document size