Merge flag features tests into orth tests in tests root

This commit is contained in:
Ines Montani 2017-01-12 15:12:00 +01:00
parent 03c136cfd3
commit 0da2ee5c68
6 changed files with 77 additions and 165 deletions

77
spacy/tests/test_orth.py Normal file
View File

@ -0,0 +1,77 @@
# coding: utf-8
from __future__ import unicode_literals
from ..orth import is_alpha, is_digit, is_punct, is_space, is_ascii, is_upper
from ..orth import is_lower, is_title, like_url, like_number, word_shape
import pytest
# TODO: brackets, is_ascii, is_upper, is_lower, is_title
@pytest.mark.parametrize('text,match', [
('1997', False), ('19.97', False), ('hello9', False), ('Hello', True),
('HELLO', True), ('Hello9', False), ('\n', False), ('!', False),
('!d', False), ('\nd', False)])
def test_orth_is_alpha(text, match):
if match:
assert is_alpha(text)
else:
assert not is_alpha(text)
@pytest.mark.parametrize('text,match', [
('1997', True), ('0000000', True), ('19.97', False), ('hello9', False), ('Hello', False), ('\n', False), ('!', False), ('!0', False),
('\n5', False)])
def test_orth_is_digit(text, match):
if match:
assert is_digit(text)
else:
assert not is_digit(text)
@pytest.mark.parametrize('text,match', [(',', True), (' ', False), ('a', False)])
def test_orth_is_punct(text,match):
if match:
assert is_punct(text)
else:
assert not is_punct(text)
@pytest.mark.parametrize('text,match', [(',', False), (' ', True), ('a', False)])
def test_orth_is_space(text,match):
if match:
assert is_space(text)
else:
assert not is_space(text)
@pytest.mark.parametrize('text,match', [
('www.google.com', True), ('google.com', True), ('sydney.com', True),
('2girls1cup.org', True), ('http://stupid', True), ('www.hi', True),
('dog', False), ('1.2', False), ('1.a', False), ('hello.There', False)])
def test_orth_like_url(text, match):
if match:
assert like_url(text)
else:
assert not like_url(text)
@pytest.mark.parametrize('text,match', [
('10', True), ('1', True), ('10,000', True), ('10,00', True),
(',10', True), ('999.0', True), ('one', True), ('two', True),
('billion', True), ('dog', False), (',', False), ('1/2', True),
('1/2/3', False)])
def test_orth_like_number(text, match):
if match:
assert like_number(text)
else:
assert not like_number(text)
@pytest.mark.parametrize('text,shape', [
('Nasa', 'Xxxx'), ('capitalized', 'xxxx'), ('999999999', 'dddd'),
('C3P0', 'XdXd'), (',', ','), ('\n', '\n'), ('``,-', '``,-')])
def test_orth_word_shape(text, shape):
assert word_shape(text) == shape

View File

@ -1,58 +0,0 @@
from __future__ import unicode_literals
import pytest
from spacy.orth import is_alpha
from spacy.orth import is_digit
from spacy.orth import is_punct
from spacy.orth import is_space
from spacy.orth import is_ascii
from spacy.orth import is_upper
from spacy.orth import is_lower
from spacy.orth import is_title
@pytest.fixture
def words():
return ["1997", "19.97", "hello9", "Hello", "HELLO", "Hello9", "\n", "!",
"!d", "\nd"]
def test_is_alpha(words):
assert not is_alpha(words[0])
assert not is_alpha(words[1])
assert not is_alpha(words[2])
assert is_alpha(words[3])
assert is_alpha(words[4])
assert not is_alpha(words[5])
assert not is_alpha(words[6])
assert not is_alpha(words[7])
assert not is_alpha(words[8])
assert not is_alpha(words[9])
def test_is_digit(words):
assert is_digit(words[0])
assert not is_digit(words[1])
assert not is_digit(words[2])
assert not is_digit(words[3])
assert not is_digit(words[4])
assert not is_digit(words[5])
assert not is_digit(words[6])
assert not is_digit(words[7])
assert not is_digit(words[8])
assert not is_digit(words[9])
def test_is_quote(words):
pass
def test_is_bracket(words):
pass
def test_is_left_bracket(words):
pass
def test_is_right_bracket(words):
pass

View File

@ -1,16 +0,0 @@
from __future__ import unicode_literals
from spacy.orth import is_punct
def test_comma():
assert is_punct(',')
def test_space():
assert not is_punct(' ')
def test_letter():
assert not is_punct('a')

View File

@ -1,35 +0,0 @@
from __future__ import unicode_literals
from spacy.orth import like_number
def test_digits():
assert like_number('10')
assert like_number('1')
def test_comma():
assert like_number('10,000')
assert like_number('10,00')
assert like_number(',10')
def test_period():
assert like_number('999.0')
assert like_number('.99')
def test_fraction():
assert like_number('1/2')
assert not like_number('1/2/3')
def test_word():
assert like_number('one')
assert like_number('two')
assert like_number('billion')
def test_not_number():
assert not like_number('dog')
assert not like_number(',')

View File

@ -1,33 +0,0 @@
from __future__ import unicode_literals
import pytest
from spacy.orth import word_shape as ws
def test_capitalized():
assert ws('Nasa') == 'Xxxx'
def test_truncate():
assert ws('capitalized') == 'xxxx'
def test_digits():
assert ws('999999999') == 'dddd'
def test_mix():
assert ws('C3P0') == 'XdXd'
def test_punct():
assert ws(',') == ','
def test_space():
assert ws('\n') == '\n'
def test_punct_seq():
assert ws('``,-') == '``,-'

View File

@ -1,23 +0,0 @@
from __future__ import unicode_literals
from spacy.orth import like_url
def test_basic_url():
assert like_url('www.google.com')
assert like_url('google.com')
assert like_url('sydney.com')
assert like_url('Sydney.edu')
assert like_url('2girls1cup.org')
def test_close_enough():
assert like_url('http://stupid')
assert like_url('www.hi')
def test_non_match():
assert not like_url('dog')
assert not like_url('1.2')
assert not like_url('1.a')
assert not like_url('hello.There')