diff --git a/spacy/tests/en/__init__.py b/spacy/tests/en/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/spacy/tests/tokenizer/conftest.py b/spacy/tests/en/conftest.py similarity index 100% rename from spacy/tests/tokenizer/conftest.py rename to spacy/tests/en/conftest.py diff --git a/spacy/tests/en/tokenizer/__init__.py b/spacy/tests/en/tokenizer/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/spacy/tests/tokenizer/sun.txt b/spacy/tests/en/tokenizer/sun.txt similarity index 100% rename from spacy/tests/tokenizer/sun.txt rename to spacy/tests/en/tokenizer/sun.txt diff --git a/spacy/tests/tokenizer/test_contractions.py b/spacy/tests/en/tokenizer/test_contractions.py similarity index 100% rename from spacy/tests/tokenizer/test_contractions.py rename to spacy/tests/en/tokenizer/test_contractions.py diff --git a/spacy/tests/tokenizer/test_exceptions.py b/spacy/tests/en/tokenizer/test_exceptions.py similarity index 100% rename from spacy/tests/tokenizer/test_exceptions.py rename to spacy/tests/en/tokenizer/test_exceptions.py diff --git a/spacy/tests/tokenizer/test_indices.py b/spacy/tests/en/tokenizer/test_indices.py similarity index 100% rename from spacy/tests/tokenizer/test_indices.py rename to spacy/tests/en/tokenizer/test_indices.py diff --git a/spacy/tests/tokenizer/test_prefix_suffix_infix.py b/spacy/tests/en/tokenizer/test_prefix_suffix_infix.py similarity index 100% rename from spacy/tests/tokenizer/test_prefix_suffix_infix.py rename to spacy/tests/en/tokenizer/test_prefix_suffix_infix.py diff --git a/spacy/tests/tokenizer/test_punct.py b/spacy/tests/en/tokenizer/test_punct.py similarity index 96% rename from spacy/tests/tokenizer/test_punct.py rename to spacy/tests/en/tokenizer/test_punct.py index f6e8a0293..b6ae9224d 100644 --- a/spacy/tests/tokenizer/test_punct.py +++ b/spacy/tests/en/tokenizer/test_punct.py @@ -6,12 +6,13 @@ from __future__ import unicode_literals import pytest -from ... import util -from ...language_data import TOKENIZER_PREFIXES +from ....util import compile_prefix_regex +from ....language_data import TOKENIZER_PREFIXES -en_search_prefixes = util.compile_prefix_regex(TOKENIZER_PREFIXES).search +en_search_prefixes = compile_prefix_regex(TOKENIZER_PREFIXES).search + PUNCT_OPEN = ['(', '[', '{', '*'] PUNCT_CLOSE = [')', ']', '}', '*'] PUNCT_PAIRED = [('(', ')'), ('[', ']'), ('{', '}'), ('*', '*')] diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/en/tokenizer/test_tokenizer.py similarity index 99% rename from spacy/tests/tokenizer/test_tokenizer.py rename to spacy/tests/en/tokenizer/test_tokenizer.py index 31bff9b14..8b34c5ec2 100644 --- a/spacy/tests/tokenizer/test_tokenizer.py +++ b/spacy/tests/en/tokenizer/test_tokenizer.py @@ -4,7 +4,7 @@ from os import path import pytest -from spacy.util import utf8open +from ....util import utf8open def test_tokenizer_handles_no_word(en_tokenizer): diff --git a/spacy/tests/tokenizer/test_whitespace.py b/spacy/tests/en/tokenizer/test_whitespace.py similarity index 100% rename from spacy/tests/tokenizer/test_whitespace.py rename to spacy/tests/en/tokenizer/test_whitespace.py