* Upd tests for new meaning of 'string'

This commit is contained in:
Matthew Honnibal 2015-01-24 07:22:30 +11:00
parent fda94271af
commit 706305ee26
8 changed files with 60 additions and 60 deletions

View File

@ -18,24 +18,24 @@ def test_possess(EN):
def test_apostrophe(EN):
tokens = EN("schools'")
assert len(tokens) == 2
assert tokens[1].string == "'"
assert tokens[0].string == "schools"
assert tokens[1].orth_ == "'"
assert tokens[0].orth_ == "schools"
def test_LL(EN):
tokens = EN("we'll")
assert len(tokens) == 2
assert tokens[1].string == "'ll"
assert tokens[1].orth_ == "'ll"
assert tokens[1].lemma_ == "will"
assert tokens[0].string == "we"
assert tokens[0].orth_ == "we"
def test_aint(EN):
tokens = EN("ain't")
assert len(tokens) == 2
assert tokens[0].string == "ai"
assert tokens[0].orth_ == "ai"
assert tokens[0].lemma_ == "be"
assert tokens[1].string == "n't"
assert tokens[1].orth_ == "n't"
assert tokens[1].lemma_ == "not"
@ -46,7 +46,7 @@ def test_capitalized(EN):
assert len(tokens) == 2
tokens = EN("Ain't")
assert len(tokens) == 2
assert tokens[0].string == "Ai"
assert tokens[0].orth_ == "Ai"
assert tokens[0].lemma_ == "be"

View File

@ -11,28 +11,28 @@ def EN():
def test_tweebo_challenge(EN):
text = u""":o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| ") :> ...."""
tokens = EN(text)
assert tokens[0].string == ":o"
assert tokens[1].string == ":/"
assert tokens[2].string == ":'("
assert tokens[3].string == ">:o"
assert tokens[4].string == "(:"
assert tokens[5].string == ":)"
assert tokens[6].string == ">.<"
assert tokens[7].string == "XD"
assert tokens[8].string == "-__-"
assert tokens[9].string == "o.O"
assert tokens[10].string == ";D"
assert tokens[11].string == ":-)"
assert tokens[12].string == "@_@"
assert tokens[13].string == ":P"
assert tokens[14].string == "8D"
assert tokens[15].string == ":1"
assert tokens[16].string == ">:("
assert tokens[17].string == ":D"
assert tokens[18].string == "=|"
assert tokens[19].string == '")'
assert tokens[20].string == ':>'
assert tokens[21].string == '....'
assert tokens[0].orth_ == ":o"
assert tokens[1].orth_ == ":/"
assert tokens[2].orth_ == ":'("
assert tokens[3].orth_ == ">:o"
assert tokens[4].orth_ == "(:"
assert tokens[5].orth_ == ":)"
assert tokens[6].orth_ == ">.<"
assert tokens[7].orth_ == "XD"
assert tokens[8].orth_ == "-__-"
assert tokens[9].orth_ == "o.O"
assert tokens[10].orth_ == ";D"
assert tokens[11].orth_ == ":-)"
assert tokens[12].orth_ == "@_@"
assert tokens[13].orth_ == ":P"
assert tokens[14].orth_ == "8D"
assert tokens[15].orth_ == ":1"
assert tokens[16].orth_ == ">:("
assert tokens[17].orth_ == ":D"
assert tokens[18].orth_ == "=|"
assert tokens[19].orth_ == '")'
assert tokens[20].orth_ == ':>'
assert tokens[21].orth_ == '....'
def test_false_positive(EN):

View File

@ -21,8 +21,8 @@ def test_open(open_puncts, EN):
string = p + word_str
tokens = EN(string)
assert len(tokens) == 2
assert tokens[0].string == p
assert tokens[1].string == word_str
assert tokens[0].orth_ == p
assert tokens[1].orth_ == word_str
def test_two_different_open(open_puncts, EN):
@ -31,9 +31,9 @@ def test_two_different_open(open_puncts, EN):
string = p + "`" + word_str
tokens = EN(string)
assert len(tokens) == 3
assert tokens[0].string == p
assert tokens[1].string == "`"
assert tokens[2].string == word_str
assert tokens[0].orth_ == p
assert tokens[1].orth_ == "`"
assert tokens[2].orth_ == word_str
def test_three_same_open(open_puncts, EN):
@ -42,12 +42,12 @@ def test_three_same_open(open_puncts, EN):
string = p + p + p + word_str
tokens = EN(string)
assert len(tokens) == 4
assert tokens[0].string == p
assert tokens[3].string == word_str
assert tokens[0].orth_ == p
assert tokens[3].orth_ == word_str
def test_open_appostrophe(EN):
string = "'The"
tokens = EN(string)
assert len(tokens) == 2
assert tokens[0].string == "'"
assert tokens[0].orth_ == "'"

View File

@ -13,9 +13,9 @@ def EN():
def test_one(EN):
tokens = EN('Betty Botter bought a pound of butter.')
assert tokens[0].string == 'Betty'
assert tokens[0].orth_ == 'Betty'
tokens2 = EN('Betty also bought a pound of butter.')
assert tokens2[0].string == 'Betty'
assert tokens2[0].orth_ == 'Betty'

View File

@ -21,9 +21,9 @@ def test_token(paired_puncts, EN):
string = open_ + word_str + close_
tokens = EN(string)
assert len(tokens) == 3
assert tokens[0].string == open_
assert tokens[1].string == word_str
assert tokens[2].string == close_
assert tokens[0].orth_ == open_
assert tokens[1].orth_ == word_str
assert tokens[2].orth_ == close_
def test_two_different(paired_puncts, EN):
@ -32,9 +32,9 @@ def test_two_different(paired_puncts, EN):
string = "`" + open_ + word_str + close_ + "'"
tokens = EN(string)
assert len(tokens) == 5
assert tokens[0].string == "`"
assert tokens[1].string == open_
assert tokens[2].string == word_str
assert tokens[2].string == word_str
assert tokens[3].string == close_
assert tokens[4].string == "'"
assert tokens[0].orth_ == "`"
assert tokens[1].orth_ == open_
assert tokens[2].orth_ == word_str
assert tokens[2].orth_ == word_str
assert tokens[3].orth_ == close_
assert tokens[4].orth_ == "'"

View File

@ -12,22 +12,22 @@ def EN():
def test_single_word(EN):
tokens = EN(u'hello')
assert tokens[0].string == 'hello'
assert tokens[0].orth_ == 'hello'
def test_two_words(EN):
tokens = EN('hello possums')
assert len(tokens) == 2
assert tokens[0].string != tokens[1].string
assert tokens[0].orth_ != tokens[1].orth_
def test_punct(EN):
tokens = EN('hello, possums.')
assert len(tokens) == 4
assert tokens[0].string == 'hello'
assert tokens[1].string == ','
assert tokens[2].string == 'possums'
assert tokens[1].string != 'hello'
assert tokens[0].orth_ == 'hello'
assert tokens[1].orth_ == ','
assert tokens[2].orth_ == 'possums'
assert tokens[1].orth_ != 'hello'
def test_digits(EN):
@ -87,14 +87,14 @@ def test_cnts2(EN):
def test_cnts3(EN):
text = u"“Isn't it?”"
tokens = EN(text)
words = [t.string for t in tokens]
words = [t.orth_ for t in tokens]
assert len(words) == 6
def test_cnts4(EN):
text = u"""Yes! "I'd rather have a walk", Ms. Comble sighed. """
tokens = EN(text)
words = [t.string for t in tokens]
words = [t.orth_ for t in tokens]
assert len(words) == 15
@ -107,13 +107,13 @@ def test_cnts5(EN):
def test_cnts6(EN):
text = u'They ran about 10km.'
tokens = EN(text)
words = [t.string for t in tokens]
words = [t.orth_ for t in tokens]
assert len(words) == 6
def test_bracket_period(EN):
text = u'(And a 6a.m. run through Washington Park).'
tokens = EN(text)
assert tokens[len(tokens) - 1].string == u'.'
assert tokens[len(tokens) - 1].orth_ == u'.'
#def test_cnts7():
# text = 'But then the 6,000-year ice age came...'

View File

@ -13,4 +13,4 @@ def test1(EN):
words = ['JAPAN', 'GET', 'LUCKY']
tokens = EN.tokenizer.tokens_from_list(words)
assert len(tokens) == 3
assert tokens[0].string == 'JAPAN'
assert tokens[0].orth_ == 'JAPAN'

View File

@ -18,7 +18,7 @@ def test_single_space(EN):
def test_double_space(EN):
tokens = EN('hello possums')
assert len(tokens) == 3
assert tokens[1].string == ' '
assert tokens[1].orth_ == ' '
def test_newline(EN):