diff --git a/spacy/fr/tokenizer_exceptions.py b/spacy/fr/tokenizer_exceptions.py index 43806e270..eef7d789d 100644 --- a/spacy/fr/tokenizer_exceptions.py +++ b/spacy/fr/tokenizer_exceptions.py @@ -13,7 +13,7 @@ from ..symbols import * import os import io -import re +import regex as re def get_exceptions(): diff --git a/spacy/hu/tokenizer_exceptions.py b/spacy/hu/tokenizer_exceptions.py index 85cf72ec9..a6dc47511 100644 --- a/spacy/hu/tokenizer_exceptions.py +++ b/spacy/hu/tokenizer_exceptions.py @@ -1,7 +1,7 @@ # coding: utf8 from __future__ import unicode_literals -import re +import regex as re from spacy.language_data.punctuation import ALPHA_LOWER, CURRENCY from ..language_data.tokenizer_exceptions import _URL_PATTERN diff --git a/spacy/language_data/punctuation.py b/spacy/language_data/punctuation.py index f94d91e80..4bb31c340 100644 --- a/spacy/language_data/punctuation.py +++ b/spacy/language_data/punctuation.py @@ -16,6 +16,8 @@ A Ä À Á  Ǎ Æ Ã Å Ā Ă Ą B C Ç Ć Č Ĉ Ċ C̄ D Ð Ď E É È Ê Ë Î Ï Í Ī Ì Ȉ Ǐ Į Ĩ J K Ķ L Ł Ļ M N Ñ Ń Ň Ņ O Ö Ó Ò Ő Ô Õ Œ Ø Ō Ő Ǒ Ơ P Q R Ř Ŗ S Ś Š Ş Ŝ T Ť U Ú Û Ù Ú Ū Ű Ǔ Ů Ų Ư V W Ŵ X Y Ÿ Ý Ỳ Ŷ Ỹ Z Ź Ž Ż Þ """ +import regex as re +re.DEFAULT_VERSION = re.VERSION1 _UNITS = """ diff --git a/spacy/util.py b/spacy/util.py index 0ccdfbd72..0c7136522 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals, print_function import ujson -import re +import regex as re from pathlib import Path import sys import textwrap