* Upd tests for new meaning of 'string'

This commit is contained in:
Matthew Honnibal 2015-01-24 07:22:30 +11:00
parent fda94271af
commit 706305ee26
8 changed files with 60 additions and 60 deletions

View File

@ -18,24 +18,24 @@ def test_possess(EN):
def test_apostrophe(EN): def test_apostrophe(EN):
tokens = EN("schools'") tokens = EN("schools'")
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[1].string == "'" assert tokens[1].orth_ == "'"
assert tokens[0].string == "schools" assert tokens[0].orth_ == "schools"
def test_LL(EN): def test_LL(EN):
tokens = EN("we'll") tokens = EN("we'll")
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[1].string == "'ll" assert tokens[1].orth_ == "'ll"
assert tokens[1].lemma_ == "will" assert tokens[1].lemma_ == "will"
assert tokens[0].string == "we" assert tokens[0].orth_ == "we"
def test_aint(EN): def test_aint(EN):
tokens = EN("ain't") tokens = EN("ain't")
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[0].string == "ai" assert tokens[0].orth_ == "ai"
assert tokens[0].lemma_ == "be" assert tokens[0].lemma_ == "be"
assert tokens[1].string == "n't" assert tokens[1].orth_ == "n't"
assert tokens[1].lemma_ == "not" assert tokens[1].lemma_ == "not"
@ -46,7 +46,7 @@ def test_capitalized(EN):
assert len(tokens) == 2 assert len(tokens) == 2
tokens = EN("Ain't") tokens = EN("Ain't")
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[0].string == "Ai" assert tokens[0].orth_ == "Ai"
assert tokens[0].lemma_ == "be" assert tokens[0].lemma_ == "be"

View File

@ -11,28 +11,28 @@ def EN():
def test_tweebo_challenge(EN): def test_tweebo_challenge(EN):
text = u""":o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| ") :> ....""" text = u""":o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| ") :> ...."""
tokens = EN(text) tokens = EN(text)
assert tokens[0].string == ":o" assert tokens[0].orth_ == ":o"
assert tokens[1].string == ":/" assert tokens[1].orth_ == ":/"
assert tokens[2].string == ":'(" assert tokens[2].orth_ == ":'("
assert tokens[3].string == ">:o" assert tokens[3].orth_ == ">:o"
assert tokens[4].string == "(:" assert tokens[4].orth_ == "(:"
assert tokens[5].string == ":)" assert tokens[5].orth_ == ":)"
assert tokens[6].string == ">.<" assert tokens[6].orth_ == ">.<"
assert tokens[7].string == "XD" assert tokens[7].orth_ == "XD"
assert tokens[8].string == "-__-" assert tokens[8].orth_ == "-__-"
assert tokens[9].string == "o.O" assert tokens[9].orth_ == "o.O"
assert tokens[10].string == ";D" assert tokens[10].orth_ == ";D"
assert tokens[11].string == ":-)" assert tokens[11].orth_ == ":-)"
assert tokens[12].string == "@_@" assert tokens[12].orth_ == "@_@"
assert tokens[13].string == ":P" assert tokens[13].orth_ == ":P"
assert tokens[14].string == "8D" assert tokens[14].orth_ == "8D"
assert tokens[15].string == ":1" assert tokens[15].orth_ == ":1"
assert tokens[16].string == ">:(" assert tokens[16].orth_ == ">:("
assert tokens[17].string == ":D" assert tokens[17].orth_ == ":D"
assert tokens[18].string == "=|" assert tokens[18].orth_ == "=|"
assert tokens[19].string == '")' assert tokens[19].orth_ == '")'
assert tokens[20].string == ':>' assert tokens[20].orth_ == ':>'
assert tokens[21].string == '....' assert tokens[21].orth_ == '....'
def test_false_positive(EN): def test_false_positive(EN):

View File

@ -21,8 +21,8 @@ def test_open(open_puncts, EN):
string = p + word_str string = p + word_str
tokens = EN(string) tokens = EN(string)
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[0].string == p assert tokens[0].orth_ == p
assert tokens[1].string == word_str assert tokens[1].orth_ == word_str
def test_two_different_open(open_puncts, EN): def test_two_different_open(open_puncts, EN):
@ -31,9 +31,9 @@ def test_two_different_open(open_puncts, EN):
string = p + "`" + word_str string = p + "`" + word_str
tokens = EN(string) tokens = EN(string)
assert len(tokens) == 3 assert len(tokens) == 3
assert tokens[0].string == p assert tokens[0].orth_ == p
assert tokens[1].string == "`" assert tokens[1].orth_ == "`"
assert tokens[2].string == word_str assert tokens[2].orth_ == word_str
def test_three_same_open(open_puncts, EN): def test_three_same_open(open_puncts, EN):
@ -42,12 +42,12 @@ def test_three_same_open(open_puncts, EN):
string = p + p + p + word_str string = p + p + p + word_str
tokens = EN(string) tokens = EN(string)
assert len(tokens) == 4 assert len(tokens) == 4
assert tokens[0].string == p assert tokens[0].orth_ == p
assert tokens[3].string == word_str assert tokens[3].orth_ == word_str
def test_open_appostrophe(EN): def test_open_appostrophe(EN):
string = "'The" string = "'The"
tokens = EN(string) tokens = EN(string)
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[0].string == "'" assert tokens[0].orth_ == "'"

View File

@ -13,9 +13,9 @@ def EN():
def test_one(EN): def test_one(EN):
tokens = EN('Betty Botter bought a pound of butter.') tokens = EN('Betty Botter bought a pound of butter.')
assert tokens[0].string == 'Betty' assert tokens[0].orth_ == 'Betty'
tokens2 = EN('Betty also bought a pound of butter.') tokens2 = EN('Betty also bought a pound of butter.')
assert tokens2[0].string == 'Betty' assert tokens2[0].orth_ == 'Betty'

View File

@ -21,9 +21,9 @@ def test_token(paired_puncts, EN):
string = open_ + word_str + close_ string = open_ + word_str + close_
tokens = EN(string) tokens = EN(string)
assert len(tokens) == 3 assert len(tokens) == 3
assert tokens[0].string == open_ assert tokens[0].orth_ == open_
assert tokens[1].string == word_str assert tokens[1].orth_ == word_str
assert tokens[2].string == close_ assert tokens[2].orth_ == close_
def test_two_different(paired_puncts, EN): def test_two_different(paired_puncts, EN):
@ -32,9 +32,9 @@ def test_two_different(paired_puncts, EN):
string = "`" + open_ + word_str + close_ + "'" string = "`" + open_ + word_str + close_ + "'"
tokens = EN(string) tokens = EN(string)
assert len(tokens) == 5 assert len(tokens) == 5
assert tokens[0].string == "`" assert tokens[0].orth_ == "`"
assert tokens[1].string == open_ assert tokens[1].orth_ == open_
assert tokens[2].string == word_str assert tokens[2].orth_ == word_str
assert tokens[2].string == word_str assert tokens[2].orth_ == word_str
assert tokens[3].string == close_ assert tokens[3].orth_ == close_
assert tokens[4].string == "'" assert tokens[4].orth_ == "'"

View File

@ -12,22 +12,22 @@ def EN():
def test_single_word(EN): def test_single_word(EN):
tokens = EN(u'hello') tokens = EN(u'hello')
assert tokens[0].string == 'hello' assert tokens[0].orth_ == 'hello'
def test_two_words(EN): def test_two_words(EN):
tokens = EN('hello possums') tokens = EN('hello possums')
assert len(tokens) == 2 assert len(tokens) == 2
assert tokens[0].string != tokens[1].string assert tokens[0].orth_ != tokens[1].orth_
def test_punct(EN): def test_punct(EN):
tokens = EN('hello, possums.') tokens = EN('hello, possums.')
assert len(tokens) == 4 assert len(tokens) == 4
assert tokens[0].string == 'hello' assert tokens[0].orth_ == 'hello'
assert tokens[1].string == ',' assert tokens[1].orth_ == ','
assert tokens[2].string == 'possums' assert tokens[2].orth_ == 'possums'
assert tokens[1].string != 'hello' assert tokens[1].orth_ != 'hello'
def test_digits(EN): def test_digits(EN):
@ -87,14 +87,14 @@ def test_cnts2(EN):
def test_cnts3(EN): def test_cnts3(EN):
text = u"“Isn't it?”" text = u"“Isn't it?”"
tokens = EN(text) tokens = EN(text)
words = [t.string for t in tokens] words = [t.orth_ for t in tokens]
assert len(words) == 6 assert len(words) == 6
def test_cnts4(EN): def test_cnts4(EN):
text = u"""Yes! "I'd rather have a walk", Ms. Comble sighed. """ text = u"""Yes! "I'd rather have a walk", Ms. Comble sighed. """
tokens = EN(text) tokens = EN(text)
words = [t.string for t in tokens] words = [t.orth_ for t in tokens]
assert len(words) == 15 assert len(words) == 15
@ -107,13 +107,13 @@ def test_cnts5(EN):
def test_cnts6(EN): def test_cnts6(EN):
text = u'They ran about 10km.' text = u'They ran about 10km.'
tokens = EN(text) tokens = EN(text)
words = [t.string for t in tokens] words = [t.orth_ for t in tokens]
assert len(words) == 6 assert len(words) == 6
def test_bracket_period(EN): def test_bracket_period(EN):
text = u'(And a 6a.m. run through Washington Park).' text = u'(And a 6a.m. run through Washington Park).'
tokens = EN(text) tokens = EN(text)
assert tokens[len(tokens) - 1].string == u'.' assert tokens[len(tokens) - 1].orth_ == u'.'
#def test_cnts7(): #def test_cnts7():
# text = 'But then the 6,000-year ice age came...' # text = 'But then the 6,000-year ice age came...'

View File

@ -13,4 +13,4 @@ def test1(EN):
words = ['JAPAN', 'GET', 'LUCKY'] words = ['JAPAN', 'GET', 'LUCKY']
tokens = EN.tokenizer.tokens_from_list(words) tokens = EN.tokenizer.tokens_from_list(words)
assert len(tokens) == 3 assert len(tokens) == 3
assert tokens[0].string == 'JAPAN' assert tokens[0].orth_ == 'JAPAN'

View File

@ -18,7 +18,7 @@ def test_single_space(EN):
def test_double_space(EN): def test_double_space(EN):
tokens = EN('hello possums') tokens = EN('hello possums')
assert len(tokens) == 3 assert len(tokens) == 3
assert tokens[1].string == ' ' assert tokens[1].orth_ == ' '
def test_newline(EN): def test_newline(EN):