diff --git a/spacy/tests/tokenizer/test_urls.py b/spacy/tests/tokenizer/test_urls.py index f4f9ef29e..316b25f12 100644 --- a/spacy/tests/tokenizer/test_urls.py +++ b/spacy/tests/tokenizer/test_urls.py @@ -21,11 +21,8 @@ URLS_FULL = URLS_BASIC + [ URLS_SHOULD_MATCH = [ "http://foo.com/blah_blah", "http://foo.com/blah_blah/", -# "http://foo.com/blah_blah_(wikipedia)", -# "http://foo.com/blah_blah_(wikipedia)_(again)", "http://www.example.com/wpstyle/?p=364", "https://www.example.com/foo/?bar=baz&inga=42&quux", - "http://✪df.ws/123", "http://userid:password@example.com:8080", "http://userid:password@example.com:8080/", "http://userid@example.com", @@ -36,7 +33,6 @@ URLS_SHOULD_MATCH = [ "http://userid:password@example.com/", "http://142.42.1.1/", "http://142.42.1.1:8080/", - "http://➡.ws/䨹", "http://⌘.ws", "http://⌘.ws/", "http://foo.com/blah_(wikipedia)#cite-1", @@ -48,13 +44,19 @@ URLS_SHOULD_MATCH = [ "http://j.mp", "ftp://foo.bar/baz", "http://foo.bar/?q=Test%20URL-encoded%20stuff", - "http://مثال.إختبار", - "http://例子.测试", -# "http://उदाहरण.परीक्षा", "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com", "http://1337.net", "http://a.b-c.de", "http://223.255.255.254", + "http://a.b--c.de/", # this is a legit domain name see: https://gist.github.com/dperini/729294 comment on 9/9/2014 + "http://✪df.ws/123", + "http://➡.ws/䨹", + "http://مثال.إختبار", + "http://例子.测试", + + pytest.mark.xfail("http://उदाहरण.परीक्षा"), + pytest.mark.xfail("http://foo.com/blah_blah_(wikipedia)"), + pytest.mark.xfail("http://foo.com/blah_blah_(wikipedia)_(again)"), ] URLS_SHOULD_NOT_MATCH = [ @@ -74,7 +76,6 @@ URLS_SHOULD_NOT_MATCH = [ "///a", "///", "http:///a", -# "foo.com", "rdar://1234", "h://test", "http:// shouldfail.com", @@ -82,21 +83,22 @@ URLS_SHOULD_NOT_MATCH = [ "http://foo.bar/foo(bar)baz quux", "ftps://foo.bar/", "http://-error-.invalid/", -# "http://a.b--c.de/", (this is a legit domain name see: https://gist.github.com/dperini/729294 comment on 9/9/2014 "http://-a.b.co", "http://a.b-.co", "http://0.0.0.0", "http://10.1.1.0", "http://10.1.1.255", "http://224.1.1.1", -# "http://1.1.1.1.1", "http://123.123.123", "http://3628126748", "http://.www.foo.bar/", -# "http://www.foo.bar./", "http://.www.foo.bar./", "http://10.1.1.1", - "NASDAQ:GOOG" + "NASDAQ:GOOG", + + pytest.mark.xfail("foo.com"), + pytest.mark.xfail("http://1.1.1.1.1"), + pytest.mark.xfail("http://www.foo.bar./"), ]