diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index 57771cca4..2c0fc9cf7 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -3,14 +3,18 @@ from __future__ import unicode_literals
 
 import re
 
+from .char_classes import ALPHA_LOWER
 from ..symbols import ORTH, POS, TAG, LEMMA, SPACE
 
 
 # URL validation regex courtesy of: https://mathiasbynens.be/demo/url-regex
-# A few minor mods to this regex to account for use cases represented in test_urls
+# and https://gist.github.com/dperini/729294 (Diego Perini, MIT License)
+# A few mods to this regex to account for use cases represented in test_urls
 URL_PATTERN = (
+    # fmt: off
     r"^"
-    # protocol identifier (see: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml)
+    # protocol identifier (mods: make optional and expand schemes)
+    # (see: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml)
     r"(?:(?:[\w\+\-\.]{2,})://)?"
     # mailto:user or user:pass authentication
     r"(?:\S+(?::\S*)?@)?"
@@ -31,18 +35,27 @@ URL_PATTERN = (
     r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}"
     r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
     r"|"
-    # host name
-    r"(?:(?:[a-z0-9\-]*)?[a-z0-9]+)"
-    # domain name
-    r"(?:\.(?:[a-z0-9])(?:[a-z0-9\-])*[a-z0-9])?"
+    # host & domain names
+    # mods: match is case-sensitive, so include [A-Z]
+      "(?:"
+        "(?:"
+          "[A-Za-z0-9\u00a1-\uffff]"
+          "[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
+        ")?"
+        "[A-Za-z0-9\u00a1-\uffff]\."
+      ")+"
     # TLD identifier
-    r"(?:\.(?:[a-z]{2,}))"
+    # mods: use ALPHA_LOWER instead of a wider range so that this doesn't match
+    # strings like "lower.Upper", which can be split on "." by infixes in some
+    # languages
+    r"(?:[" + ALPHA_LOWER + "]{2,63})"
     r")"
     # port number
     r"(?::\d{2,5})?"
     # resource path
     r"(?:[/?#]\S*)?"
     r"$"
+    # fmt: on
 ).strip()
 
 TOKEN_MATCH = re.compile(URL_PATTERN, re.UNICODE).match
diff --git a/spacy/tests/tokenizer/test_urls.py b/spacy/tests/tokenizer/test_urls.py
index 21e1819b7..ef99484ee 100644
--- a/spacy/tests/tokenizer/test_urls.py
+++ b/spacy/tests/tokenizer/test_urls.py
@@ -20,6 +20,7 @@ URLS_FULL = URLS_BASIC + [
 # URL SHOULD_MATCH and SHOULD_NOT_MATCH patterns courtesy of https://mathiasbynens.be/demo/url-regex
 URLS_SHOULD_MATCH = [
     "http://foo.com/blah_blah",
+    "http://BlahBlah.com/Blah_Blah",
     "http://foo.com/blah_blah/",
     "http://www.example.com/wpstyle/?p=364",
     "https://www.example.com/foo/?bar=baz&inga=42&quux",
@@ -57,14 +58,17 @@ URLS_SHOULD_MATCH = [
     ),
     "http://foo.com/blah_blah_(wikipedia)",
     "http://foo.com/blah_blah_(wikipedia)_(again)",
-    pytest.param("http://⌘.ws", marks=pytest.mark.xfail()),
-    pytest.param("http://⌘.ws/", marks=pytest.mark.xfail()),
-    pytest.param("http://☺.damowmow.com/", marks=pytest.mark.xfail()),
-    pytest.param("http://✪df.ws/123", marks=pytest.mark.xfail()),
-    pytest.param("http://➡.ws/䨹", marks=pytest.mark.xfail()),
-    pytest.param("http://مثال.إختبار", marks=pytest.mark.xfail()),
+    "http://www.foo.co.uk",
+    "http://www.foo.co.uk/",
+    "http://www.foo.co.uk/blah/blah",
+    "http://⌘.ws",
+    "http://⌘.ws/",
+    "http://☺.damowmow.com/",
+    "http://✪df.ws/123",
+    "http://➡.ws/䨹",
+    "http://مثال.إختبار",
     pytest.param("http://例子.测试", marks=pytest.mark.xfail()),
-    pytest.param("http://उदाहरण.परीक्षा", marks=pytest.mark.xfail()),
+    "http://उदाहरण.परीक्षा",
 ]
 
 URLS_SHOULD_NOT_MATCH = [