diff --git a/spacy/tests/test_orth.py b/spacy/tests/test_orth.py new file mode 100644 index 000000000..c8dcbf209 --- /dev/null +++ b/spacy/tests/test_orth.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..orth import is_alpha, is_digit, is_punct, is_space, is_ascii, is_upper +from ..orth import is_lower, is_title, like_url, like_number, word_shape + +import pytest + + +# TODO: brackets, is_ascii, is_upper, is_lower, is_title + + +@pytest.mark.parametrize('text,match', [ + ('1997', False), ('19.97', False), ('hello9', False), ('Hello', True), + ('HELLO', True), ('Hello9', False), ('\n', False), ('!', False), + ('!d', False), ('\nd', False)]) +def test_orth_is_alpha(text, match): + if match: + assert is_alpha(text) + else: + assert not is_alpha(text) + + +@pytest.mark.parametrize('text,match', [ + ('1997', True), ('0000000', True), ('19.97', False), ('hello9', False), ('Hello', False), ('\n', False), ('!', False), ('!0', False), + ('\n5', False)]) +def test_orth_is_digit(text, match): + if match: + assert is_digit(text) + else: + assert not is_digit(text) + + +@pytest.mark.parametrize('text,match', [(',', True), (' ', False), ('a', False)]) +def test_orth_is_punct(text,match): + if match: + assert is_punct(text) + else: + assert not is_punct(text) + + +@pytest.mark.parametrize('text,match', [(',', False), (' ', True), ('a', False)]) +def test_orth_is_space(text,match): + if match: + assert is_space(text) + else: + assert not is_space(text) + + +@pytest.mark.parametrize('text,match', [ + ('www.google.com', True), ('google.com', True), ('sydney.com', True), + ('2girls1cup.org', True), ('http://stupid', True), ('www.hi', True), + ('dog', False), ('1.2', False), ('1.a', False), ('hello.There', False)]) +def test_orth_like_url(text, match): + if match: + assert like_url(text) + else: + assert not like_url(text) + + +@pytest.mark.parametrize('text,match', [ + ('10', True), ('1', True), ('10,000', True), ('10,00', True), + (',10', True), ('999.0', True), ('one', True), ('two', True), + ('billion', True), ('dog', False), (',', False), ('1/2', True), + ('1/2/3', False)]) +def test_orth_like_number(text, match): + if match: + assert like_number(text) + else: + assert not like_number(text) + + +@pytest.mark.parametrize('text,shape', [ + ('Nasa', 'Xxxx'), ('capitalized', 'xxxx'), ('999999999', 'dddd'), + ('C3P0', 'XdXd'), (',', ','), ('\n', '\n'), ('``,-', '``,-')]) +def test_orth_word_shape(text, shape): + assert word_shape(text) == shape diff --git a/spacy/tests/vocab/test_flag_features.py b/spacy/tests/vocab/test_flag_features.py deleted file mode 100644 index 880704e28..000000000 --- a/spacy/tests/vocab/test_flag_features.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import unicode_literals -import pytest - -from spacy.orth import is_alpha -from spacy.orth import is_digit -from spacy.orth import is_punct -from spacy.orth import is_space -from spacy.orth import is_ascii -from spacy.orth import is_upper -from spacy.orth import is_lower -from spacy.orth import is_title - - -@pytest.fixture -def words(): - return ["1997", "19.97", "hello9", "Hello", "HELLO", "Hello9", "\n", "!", - "!d", "\nd"] - - -def test_is_alpha(words): - assert not is_alpha(words[0]) - assert not is_alpha(words[1]) - assert not is_alpha(words[2]) - assert is_alpha(words[3]) - assert is_alpha(words[4]) - assert not is_alpha(words[5]) - assert not is_alpha(words[6]) - assert not is_alpha(words[7]) - assert not is_alpha(words[8]) - assert not is_alpha(words[9]) - - -def test_is_digit(words): - assert is_digit(words[0]) - assert not is_digit(words[1]) - assert not is_digit(words[2]) - assert not is_digit(words[3]) - assert not is_digit(words[4]) - assert not is_digit(words[5]) - assert not is_digit(words[6]) - assert not is_digit(words[7]) - assert not is_digit(words[8]) - assert not is_digit(words[9]) - - -def test_is_quote(words): - pass - - -def test_is_bracket(words): - pass - - -def test_is_left_bracket(words): - pass - -def test_is_right_bracket(words): - pass diff --git a/spacy/tests/vocab/test_is_punct.py b/spacy/tests/vocab/test_is_punct.py deleted file mode 100644 index 242e31212..000000000 --- a/spacy/tests/vocab/test_is_punct.py +++ /dev/null @@ -1,16 +0,0 @@ -from __future__ import unicode_literals - - -from spacy.orth import is_punct - - -def test_comma(): - assert is_punct(',') - - -def test_space(): - assert not is_punct(' ') - - -def test_letter(): - assert not is_punct('a') diff --git a/spacy/tests/vocab/test_number.py b/spacy/tests/vocab/test_number.py deleted file mode 100644 index 2ca840a06..000000000 --- a/spacy/tests/vocab/test_number.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import unicode_literals - -from spacy.orth import like_number - - -def test_digits(): - assert like_number('10') - assert like_number('1') - - -def test_comma(): - assert like_number('10,000') - assert like_number('10,00') - assert like_number(',10') - - -def test_period(): - assert like_number('999.0') - assert like_number('.99') - - -def test_fraction(): - assert like_number('1/2') - assert not like_number('1/2/3') - - -def test_word(): - assert like_number('one') - assert like_number('two') - assert like_number('billion') - - -def test_not_number(): - assert not like_number('dog') - assert not like_number(',') diff --git a/spacy/tests/vocab/test_shape.py b/spacy/tests/vocab/test_shape.py deleted file mode 100644 index 0568feb6a..000000000 --- a/spacy/tests/vocab/test_shape.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import unicode_literals - -import pytest - -from spacy.orth import word_shape as ws - - -def test_capitalized(): - assert ws('Nasa') == 'Xxxx' - - -def test_truncate(): - assert ws('capitalized') == 'xxxx' - - -def test_digits(): - assert ws('999999999') == 'dddd' - - -def test_mix(): - assert ws('C3P0') == 'XdXd' - - -def test_punct(): - assert ws(',') == ',' - - -def test_space(): - assert ws('\n') == '\n' - - -def test_punct_seq(): - assert ws('``,-') == '``,-' diff --git a/spacy/tests/vocab/test_urlish.py b/spacy/tests/vocab/test_urlish.py deleted file mode 100644 index 3faa40c5e..000000000 --- a/spacy/tests/vocab/test_urlish.py +++ /dev/null @@ -1,23 +0,0 @@ -from __future__ import unicode_literals - -from spacy.orth import like_url - - -def test_basic_url(): - assert like_url('www.google.com') - assert like_url('google.com') - assert like_url('sydney.com') - assert like_url('Sydney.edu') - assert like_url('2girls1cup.org') - - -def test_close_enough(): - assert like_url('http://stupid') - assert like_url('www.hi') - - -def test_non_match(): - assert not like_url('dog') - assert not like_url('1.2') - assert not like_url('1.a') - assert not like_url('hello.There')