From be7a7aeb1a403a191e125370ef12ebc5a3304f0e Mon Sep 17 00:00:00 2001 From: Gyorgy Orosz Date: Sat, 14 Jan 2017 15:56:41 +0100 Subject: [PATCH] Reversed accidental changes. --- spacy/hu/tokenizer_exceptions.py | 4 ++-- spacy/language_data/tokenizer_exceptions.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/spacy/hu/tokenizer_exceptions.py b/spacy/hu/tokenizer_exceptions.py index 24ef669e2..6429fcf14 100644 --- a/spacy/hu/tokenizer_exceptions.py +++ b/spacy/hu/tokenizer_exceptions.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re from spacy.language_data.punctuation import ALPHA_LOWER, CURRENCY -from ..language_data.tokenizer_exceptions import URL_PATTERN +from ..language_data.tokenizer_exceptions import _URL_PATTERN ABBREVIATIONS = """ AkH. @@ -548,4 +548,4 @@ NUMS = "(({ne})|({t})|({on})|({c}))({s})?".format( c=CURRENCY, s=_SUFFIES ) -TOKEN_MATCH = re.compile("^({u})|({n})$".format(u=URL_PATTERN, n=NUMS)).match +TOKEN_MATCH = re.compile("^({u})|({n})$".format(u=_URL_PATTERN, n=NUMS)).match diff --git a/spacy/language_data/tokenizer_exceptions.py b/spacy/language_data/tokenizer_exceptions.py index 33a2417d1..6551440f2 100644 --- a/spacy/language_data/tokenizer_exceptions.py +++ b/spacy/language_data/tokenizer_exceptions.py @@ -2,10 +2,10 @@ from __future__ import unicode_literals import re -URL_PATTERN = r''' -((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w\-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?) +_URL_PATTERN = r''' +^((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=\+\$,\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\+\$,\w]+@)[A-Za-z0-9.-]+)((?:\/[\+~%\/.\w\-_]*)?\??(?:[-\+=&;%@.\w_]*)#?(?:[\w]*))?)$ '''.strip() -TOKEN_MATCH = re.compile("^{}$".format(URL_PATTERN)).match +TOKEN_MATCH = re.compile(_URL_PATTERN).match __all__ = ['TOKEN_MATCH']