diff --git a/spacy/lang/hu/punctuation.py b/spacy/lang/hu/punctuation.py
index bc043486f..a010bb7ae 100644
--- a/spacy/lang/hu/punctuation.py
+++ b/spacy/lang/hu/punctuation.py
@@ -10,7 +10,6 @@ _concat_icons = CONCAT_ICONS.replace("\u00B0", "")
 
 _currency = r"\$¢£€¥฿"
 _quotes = CONCAT_QUOTES.replace("'", "")
-_units = UNITS.replace("%", "")
 
 _prefixes = (
     LIST_PUNCT
@@ -21,7 +20,8 @@ _prefixes = (
 )
 
 _suffixes = (
-    LIST_PUNCT
+    [r"\+"]
+    + LIST_PUNCT
     + LIST_ELLIPSES
     + LIST_QUOTES
     + [_concat_icons]
@@ -29,7 +29,7 @@ _suffixes = (
         r"(?<=[0-9])\+",
         r"(?<=°[FfCcKk])\.",
         r"(?<=[0-9])(?:[{c}])".format(c=_currency),
-        r"(?<=[0-9])(?:{u})".format(u=_units),
+        r"(?<=[0-9])(?:{u})".format(u=UNITS),
         r"(?<=[{al}{e}{q}(?:{c})])\.".format(
             al=ALPHA_LOWER, e=r"%²\-\+", q=CONCAT_QUOTES, c=_currency
         ),
diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index 2c0fc9cf7..42dbc7bac 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .char_classes import ALPHA_LOWER
+from .char_classes import ALPHA_LOWER, ALPHA
 from ..symbols import ORTH, POS, TAG, LEMMA, SPACE
 
 
@@ -13,6 +13,8 @@ from ..symbols import ORTH, POS, TAG, LEMMA, SPACE
 URL_PATTERN = (
     # fmt: off
     r"^"
+    # in order to support the prefix tokenization (see prefix test cases in test_urls).
+    r"(?=[" + ALPHA + "\w])"
     # protocol identifier (mods: make optional and expand schemes)
     # (see: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml)
     r"(?:(?:[\w\+\-\.]{2,})://)?"
@@ -54,6 +56,8 @@ URL_PATTERN = (
     r"(?::\d{2,5})?"
     # resource path
     r"(?:[/?#]\S*)?"
+    # in order to support the suffix tokenization (see suffix test cases in test_urls),
+    r"(?<=[" + ALPHA + "\w/])"
     r"$"
     # fmt: on
 ).strip()
diff --git a/spacy/tests/tokenizer/test_urls.py b/spacy/tests/tokenizer/test_urls.py
index 58e9d73f3..2d82e213c 100644
--- a/spacy/tests/tokenizer/test_urls.py
+++ b/spacy/tests/tokenizer/test_urls.py
@@ -56,8 +56,12 @@ URLS_SHOULD_MATCH = [
     pytest.param(
         "chrome-extension://mhjfbmdgcfjbbpaeojofohoefgiehjai", marks=pytest.mark.xfail()
     ),
-    "http://foo.com/blah_blah_(wikipedia)",
-    "http://foo.com/blah_blah_(wikipedia)_(again)",
+    pytest.param(
+        "http://foo.com/blah_blah_(wikipedia)", marks=pytest.mark.xfail()
+    ),
+    pytest.param(
+        "http://foo.com/blah_blah_(wikipedia)_(again)", marks=pytest.mark.xfail()
+    ),
     "http://www.foo.co.uk",
     "http://www.foo.co.uk/",
     "http://www.foo.co.uk/blah/blah",
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 4da081259..6f7e44061 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -239,6 +239,8 @@ cdef class Tokenizer:
         cdef unicode minus_suf
         cdef size_t last_size = 0
         while string and len(string) != last_size:
+            if self.token_match and self.token_match(string):
+                break
             if self._specials.get(hash_string(string)) != NULL:
                 has_special[0] = 1
                 break
@@ -455,6 +457,10 @@ cdef class Tokenizer:
             suffixes = []
             while substring:
                 while prefix_search(substring) or suffix_search(substring):
+                    if token_match(substring):
+                        tokens.append(("TOKEN_MATCH", substring))
+                        substring = ''
+                        break
                     if substring in special_cases:
                         tokens.extend(("SPECIAL-" + str(i + 1), self.vocab.strings[e[ORTH]]) for i, e in enumerate(special_cases[substring]))
                         substring = ''
@@ -475,12 +481,12 @@ cdef class Tokenizer:
                             break
                         suffixes.append(("SUFFIX", substring[split:]))
                         substring = substring[:split]
-                if substring in special_cases:
-                    tokens.extend(("SPECIAL-" + str(i + 1), self.vocab.strings[e[ORTH]]) for i, e in enumerate(special_cases[substring]))
-                    substring = ''
-                elif token_match(substring):
+                if token_match(substring):
                     tokens.append(("TOKEN_MATCH", substring))
                     substring = ''
+                elif substring in special_cases:
+                    tokens.extend(("SPECIAL-" + str(i + 1), self.vocab.strings[e[ORTH]]) for i, e in enumerate(special_cases[substring]))
+                    substring = ''
                 elif list(infix_finditer(substring)):
                     infixes = infix_finditer(substring)
                     offset = 0
diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md
index 685619c88..60a6699a9 100644
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@@ -740,6 +740,10 @@ def tokenizer_pseudo_code(self, special_cases, prefix_search, suffix_search,
         suffixes = []
         while substring:
             while prefix_search(substring) or suffix_search(substring):
+                if token_match(substring):
+                    tokens.append(substring)
+                    substring = ''
+                    break
                 if substring in special_cases:
                     tokens.extend(special_cases[substring])
                     substring = ''
@@ -754,12 +758,12 @@ def tokenizer_pseudo_code(self, special_cases, prefix_search, suffix_search,
                     split = suffix_search(substring).start()
                     suffixes.append(substring[split:])
                     substring = substring[:split]
-            if substring in special_cases:
-                tokens.extend(special_cases[substring])
-                substring = ''
-            elif token_match(substring):
+            if token_match(substring):
                 tokens.append(substring)
                 substring = ''
+            elif substring in special_cases:
+                tokens.extend(special_cases[substring])
+                substring = ''
             elif list(infix_finditer(substring)):
                 infixes = infix_finditer(substring)
                 offset = 0
@@ -780,14 +784,14 @@ def tokenizer_pseudo_code(self, special_cases, prefix_search, suffix_search,
 The algorithm can be summarized as follows:
 
 1. Iterate over whitespace-separated substrings.
-2. Check whether we have an explicitly defined rule for this substring. If we
+2. Look for a token match. If there is a match, stop processing and keep this token.
+3. Check whether we have an explicitly defined rule for this substring. If we
    do, use it.
-3. Otherwise, try to consume one prefix. If we consumed a prefix, go back to #2,
-   so that special cases always get priority.
-4. If we didn't consume a prefix, try to consume a suffix and then go back to
+4. Otherwise, try to consume one prefix. If we consumed a prefix, go back to #2,
+   so that the token match and special cases always get priority.
+5. If we didn't consume a prefix, try to consume a suffix and then go back to
    #2.
-5. If we can't consume a prefix or a suffix, look for a special case.
-6. Next, look for a token match.
+6. If we can't consume a prefix or a suffix, look for a special case.
 7. Look for "infixes" — stuff like hyphens etc. and split the substring into
    tokens on all infixes.
 8. Once we can't consume any more of the string, handle it as a single token.