diff --git a/tests/test_contractions.py b/tests/test_contractions.py index d3ebd6e84..4436fe86d 100644 --- a/tests/test_contractions.py +++ b/tests/test_contractions.py @@ -18,24 +18,24 @@ def test_possess(EN): def test_apostrophe(EN): tokens = EN("schools'") assert len(tokens) == 2 - assert tokens[1].string == "'" - assert tokens[0].string == "schools" + assert tokens[1].orth_ == "'" + assert tokens[0].orth_ == "schools" def test_LL(EN): tokens = EN("we'll") assert len(tokens) == 2 - assert tokens[1].string == "'ll" + assert tokens[1].orth_ == "'ll" assert tokens[1].lemma_ == "will" - assert tokens[0].string == "we" + assert tokens[0].orth_ == "we" def test_aint(EN): tokens = EN("ain't") assert len(tokens) == 2 - assert tokens[0].string == "ai" + assert tokens[0].orth_ == "ai" assert tokens[0].lemma_ == "be" - assert tokens[1].string == "n't" + assert tokens[1].orth_ == "n't" assert tokens[1].lemma_ == "not" @@ -46,7 +46,7 @@ def test_capitalized(EN): assert len(tokens) == 2 tokens = EN("Ain't") assert len(tokens) == 2 - assert tokens[0].string == "Ai" + assert tokens[0].orth_ == "Ai" assert tokens[0].lemma_ == "be" diff --git a/tests/test_emoticons.py b/tests/test_emoticons.py index 2d27fef61..2725845a6 100644 --- a/tests/test_emoticons.py +++ b/tests/test_emoticons.py @@ -11,28 +11,28 @@ def EN(): def test_tweebo_challenge(EN): text = u""":o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| ") :> ....""" tokens = EN(text) - assert tokens[0].string == ":o" - assert tokens[1].string == ":/" - assert tokens[2].string == ":'(" - assert tokens[3].string == ">:o" - assert tokens[4].string == "(:" - assert tokens[5].string == ":)" - assert tokens[6].string == ">.<" - assert tokens[7].string == "XD" - assert tokens[8].string == "-__-" - assert tokens[9].string == "o.O" - assert tokens[10].string == ";D" - assert tokens[11].string == ":-)" - assert tokens[12].string == "@_@" - assert tokens[13].string == ":P" - assert tokens[14].string == "8D" - assert tokens[15].string == ":1" - assert tokens[16].string == ">:(" - assert tokens[17].string == ":D" - assert tokens[18].string == "=|" - assert tokens[19].string == '")' - assert tokens[20].string == ':>' - assert tokens[21].string == '....' + assert tokens[0].orth_ == ":o" + assert tokens[1].orth_ == ":/" + assert tokens[2].orth_ == ":'(" + assert tokens[3].orth_ == ">:o" + assert tokens[4].orth_ == "(:" + assert tokens[5].orth_ == ":)" + assert tokens[6].orth_ == ">.<" + assert tokens[7].orth_ == "XD" + assert tokens[8].orth_ == "-__-" + assert tokens[9].orth_ == "o.O" + assert tokens[10].orth_ == ";D" + assert tokens[11].orth_ == ":-)" + assert tokens[12].orth_ == "@_@" + assert tokens[13].orth_ == ":P" + assert tokens[14].orth_ == "8D" + assert tokens[15].orth_ == ":1" + assert tokens[16].orth_ == ">:(" + assert tokens[17].orth_ == ":D" + assert tokens[18].orth_ == "=|" + assert tokens[19].orth_ == '")' + assert tokens[20].orth_ == ':>' + assert tokens[21].orth_ == '....' def test_false_positive(EN): diff --git a/tests/test_pre_punct.py b/tests/test_pre_punct.py index 2eb6fdd3d..f932c400e 100644 --- a/tests/test_pre_punct.py +++ b/tests/test_pre_punct.py @@ -21,8 +21,8 @@ def test_open(open_puncts, EN): string = p + word_str tokens = EN(string) assert len(tokens) == 2 - assert tokens[0].string == p - assert tokens[1].string == word_str + assert tokens[0].orth_ == p + assert tokens[1].orth_ == word_str def test_two_different_open(open_puncts, EN): @@ -31,9 +31,9 @@ def test_two_different_open(open_puncts, EN): string = p + "`" + word_str tokens = EN(string) assert len(tokens) == 3 - assert tokens[0].string == p - assert tokens[1].string == "`" - assert tokens[2].string == word_str + assert tokens[0].orth_ == p + assert tokens[1].orth_ == "`" + assert tokens[2].orth_ == word_str def test_three_same_open(open_puncts, EN): @@ -42,12 +42,12 @@ def test_three_same_open(open_puncts, EN): string = p + p + p + word_str tokens = EN(string) assert len(tokens) == 4 - assert tokens[0].string == p - assert tokens[3].string == word_str + assert tokens[0].orth_ == p + assert tokens[3].orth_ == word_str def test_open_appostrophe(EN): string = "'The" tokens = EN(string) assert len(tokens) == 2 - assert tokens[0].string == "'" + assert tokens[0].orth_ == "'" diff --git a/tests/test_string_loading.py b/tests/test_string_loading.py index a5021856f..86cd4f2a9 100644 --- a/tests/test_string_loading.py +++ b/tests/test_string_loading.py @@ -13,9 +13,9 @@ def EN(): def test_one(EN): tokens = EN('Betty Botter bought a pound of butter.') - assert tokens[0].string == 'Betty' + assert tokens[0].orth_ == 'Betty' tokens2 = EN('Betty also bought a pound of butter.') - assert tokens2[0].string == 'Betty' + assert tokens2[0].orth_ == 'Betty' diff --git a/tests/test_surround_punct.py b/tests/test_surround_punct.py index 0c816ad8f..65ef0209f 100644 --- a/tests/test_surround_punct.py +++ b/tests/test_surround_punct.py @@ -21,9 +21,9 @@ def test_token(paired_puncts, EN): string = open_ + word_str + close_ tokens = EN(string) assert len(tokens) == 3 - assert tokens[0].string == open_ - assert tokens[1].string == word_str - assert tokens[2].string == close_ + assert tokens[0].orth_ == open_ + assert tokens[1].orth_ == word_str + assert tokens[2].orth_ == close_ def test_two_different(paired_puncts, EN): @@ -32,9 +32,9 @@ def test_two_different(paired_puncts, EN): string = "`" + open_ + word_str + close_ + "'" tokens = EN(string) assert len(tokens) == 5 - assert tokens[0].string == "`" - assert tokens[1].string == open_ - assert tokens[2].string == word_str - assert tokens[2].string == word_str - assert tokens[3].string == close_ - assert tokens[4].string == "'" + assert tokens[0].orth_ == "`" + assert tokens[1].orth_ == open_ + assert tokens[2].orth_ == word_str + assert tokens[2].orth_ == word_str + assert tokens[3].orth_ == close_ + assert tokens[4].orth_ == "'" diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 4cbc9d4f0..1f195e5e7 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -12,22 +12,22 @@ def EN(): def test_single_word(EN): tokens = EN(u'hello') - assert tokens[0].string == 'hello' + assert tokens[0].orth_ == 'hello' def test_two_words(EN): tokens = EN('hello possums') assert len(tokens) == 2 - assert tokens[0].string != tokens[1].string + assert tokens[0].orth_ != tokens[1].orth_ def test_punct(EN): tokens = EN('hello, possums.') assert len(tokens) == 4 - assert tokens[0].string == 'hello' - assert tokens[1].string == ',' - assert tokens[2].string == 'possums' - assert tokens[1].string != 'hello' + assert tokens[0].orth_ == 'hello' + assert tokens[1].orth_ == ',' + assert tokens[2].orth_ == 'possums' + assert tokens[1].orth_ != 'hello' def test_digits(EN): @@ -87,14 +87,14 @@ def test_cnts2(EN): def test_cnts3(EN): text = u"“Isn't it?”" tokens = EN(text) - words = [t.string for t in tokens] + words = [t.orth_ for t in tokens] assert len(words) == 6 def test_cnts4(EN): text = u"""Yes! "I'd rather have a walk", Ms. Comble sighed. """ tokens = EN(text) - words = [t.string for t in tokens] + words = [t.orth_ for t in tokens] assert len(words) == 15 @@ -107,13 +107,13 @@ def test_cnts5(EN): def test_cnts6(EN): text = u'They ran about 10km.' tokens = EN(text) - words = [t.string for t in tokens] + words = [t.orth_ for t in tokens] assert len(words) == 6 def test_bracket_period(EN): text = u'(And a 6a.m. run through Washington Park).' tokens = EN(text) - assert tokens[len(tokens) - 1].string == u'.' + assert tokens[len(tokens) - 1].orth_ == u'.' #def test_cnts7(): # text = 'But then the 6,000-year ice age came...' diff --git a/tests/test_tokens_from_list.py b/tests/test_tokens_from_list.py index e72c355fa..5bb5d7d69 100644 --- a/tests/test_tokens_from_list.py +++ b/tests/test_tokens_from_list.py @@ -13,4 +13,4 @@ def test1(EN): words = ['JAPAN', 'GET', 'LUCKY'] tokens = EN.tokenizer.tokens_from_list(words) assert len(tokens) == 3 - assert tokens[0].string == 'JAPAN' + assert tokens[0].orth_ == 'JAPAN' diff --git a/tests/test_whitespace.py b/tests/test_whitespace.py index 3fc96272a..a3a700235 100644 --- a/tests/test_whitespace.py +++ b/tests/test_whitespace.py @@ -18,7 +18,7 @@ def test_single_space(EN): def test_double_space(EN): tokens = EN('hello possums') assert len(tokens) == 3 - assert tokens[1].string == ' ' + assert tokens[1].orth_ == ' ' def test_newline(EN):