mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
* Upd tests for new meaning of 'string'
This commit is contained in:
parent
fda94271af
commit
706305ee26
|
@ -18,24 +18,24 @@ def test_possess(EN):
|
||||||
def test_apostrophe(EN):
|
def test_apostrophe(EN):
|
||||||
tokens = EN("schools'")
|
tokens = EN("schools'")
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
assert tokens[1].string == "'"
|
assert tokens[1].orth_ == "'"
|
||||||
assert tokens[0].string == "schools"
|
assert tokens[0].orth_ == "schools"
|
||||||
|
|
||||||
|
|
||||||
def test_LL(EN):
|
def test_LL(EN):
|
||||||
tokens = EN("we'll")
|
tokens = EN("we'll")
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
assert tokens[1].string == "'ll"
|
assert tokens[1].orth_ == "'ll"
|
||||||
assert tokens[1].lemma_ == "will"
|
assert tokens[1].lemma_ == "will"
|
||||||
assert tokens[0].string == "we"
|
assert tokens[0].orth_ == "we"
|
||||||
|
|
||||||
|
|
||||||
def test_aint(EN):
|
def test_aint(EN):
|
||||||
tokens = EN("ain't")
|
tokens = EN("ain't")
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
assert tokens[0].string == "ai"
|
assert tokens[0].orth_ == "ai"
|
||||||
assert tokens[0].lemma_ == "be"
|
assert tokens[0].lemma_ == "be"
|
||||||
assert tokens[1].string == "n't"
|
assert tokens[1].orth_ == "n't"
|
||||||
assert tokens[1].lemma_ == "not"
|
assert tokens[1].lemma_ == "not"
|
||||||
|
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ def test_capitalized(EN):
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
tokens = EN("Ain't")
|
tokens = EN("Ain't")
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
assert tokens[0].string == "Ai"
|
assert tokens[0].orth_ == "Ai"
|
||||||
assert tokens[0].lemma_ == "be"
|
assert tokens[0].lemma_ == "be"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -11,28 +11,28 @@ def EN():
|
||||||
def test_tweebo_challenge(EN):
|
def test_tweebo_challenge(EN):
|
||||||
text = u""":o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| ") :> ...."""
|
text = u""":o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| ") :> ...."""
|
||||||
tokens = EN(text)
|
tokens = EN(text)
|
||||||
assert tokens[0].string == ":o"
|
assert tokens[0].orth_ == ":o"
|
||||||
assert tokens[1].string == ":/"
|
assert tokens[1].orth_ == ":/"
|
||||||
assert tokens[2].string == ":'("
|
assert tokens[2].orth_ == ":'("
|
||||||
assert tokens[3].string == ">:o"
|
assert tokens[3].orth_ == ">:o"
|
||||||
assert tokens[4].string == "(:"
|
assert tokens[4].orth_ == "(:"
|
||||||
assert tokens[5].string == ":)"
|
assert tokens[5].orth_ == ":)"
|
||||||
assert tokens[6].string == ">.<"
|
assert tokens[6].orth_ == ">.<"
|
||||||
assert tokens[7].string == "XD"
|
assert tokens[7].orth_ == "XD"
|
||||||
assert tokens[8].string == "-__-"
|
assert tokens[8].orth_ == "-__-"
|
||||||
assert tokens[9].string == "o.O"
|
assert tokens[9].orth_ == "o.O"
|
||||||
assert tokens[10].string == ";D"
|
assert tokens[10].orth_ == ";D"
|
||||||
assert tokens[11].string == ":-)"
|
assert tokens[11].orth_ == ":-)"
|
||||||
assert tokens[12].string == "@_@"
|
assert tokens[12].orth_ == "@_@"
|
||||||
assert tokens[13].string == ":P"
|
assert tokens[13].orth_ == ":P"
|
||||||
assert tokens[14].string == "8D"
|
assert tokens[14].orth_ == "8D"
|
||||||
assert tokens[15].string == ":1"
|
assert tokens[15].orth_ == ":1"
|
||||||
assert tokens[16].string == ">:("
|
assert tokens[16].orth_ == ">:("
|
||||||
assert tokens[17].string == ":D"
|
assert tokens[17].orth_ == ":D"
|
||||||
assert tokens[18].string == "=|"
|
assert tokens[18].orth_ == "=|"
|
||||||
assert tokens[19].string == '")'
|
assert tokens[19].orth_ == '")'
|
||||||
assert tokens[20].string == ':>'
|
assert tokens[20].orth_ == ':>'
|
||||||
assert tokens[21].string == '....'
|
assert tokens[21].orth_ == '....'
|
||||||
|
|
||||||
|
|
||||||
def test_false_positive(EN):
|
def test_false_positive(EN):
|
||||||
|
|
|
@ -21,8 +21,8 @@ def test_open(open_puncts, EN):
|
||||||
string = p + word_str
|
string = p + word_str
|
||||||
tokens = EN(string)
|
tokens = EN(string)
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
assert tokens[0].string == p
|
assert tokens[0].orth_ == p
|
||||||
assert tokens[1].string == word_str
|
assert tokens[1].orth_ == word_str
|
||||||
|
|
||||||
|
|
||||||
def test_two_different_open(open_puncts, EN):
|
def test_two_different_open(open_puncts, EN):
|
||||||
|
@ -31,9 +31,9 @@ def test_two_different_open(open_puncts, EN):
|
||||||
string = p + "`" + word_str
|
string = p + "`" + word_str
|
||||||
tokens = EN(string)
|
tokens = EN(string)
|
||||||
assert len(tokens) == 3
|
assert len(tokens) == 3
|
||||||
assert tokens[0].string == p
|
assert tokens[0].orth_ == p
|
||||||
assert tokens[1].string == "`"
|
assert tokens[1].orth_ == "`"
|
||||||
assert tokens[2].string == word_str
|
assert tokens[2].orth_ == word_str
|
||||||
|
|
||||||
|
|
||||||
def test_three_same_open(open_puncts, EN):
|
def test_three_same_open(open_puncts, EN):
|
||||||
|
@ -42,12 +42,12 @@ def test_three_same_open(open_puncts, EN):
|
||||||
string = p + p + p + word_str
|
string = p + p + p + word_str
|
||||||
tokens = EN(string)
|
tokens = EN(string)
|
||||||
assert len(tokens) == 4
|
assert len(tokens) == 4
|
||||||
assert tokens[0].string == p
|
assert tokens[0].orth_ == p
|
||||||
assert tokens[3].string == word_str
|
assert tokens[3].orth_ == word_str
|
||||||
|
|
||||||
|
|
||||||
def test_open_appostrophe(EN):
|
def test_open_appostrophe(EN):
|
||||||
string = "'The"
|
string = "'The"
|
||||||
tokens = EN(string)
|
tokens = EN(string)
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
assert tokens[0].string == "'"
|
assert tokens[0].orth_ == "'"
|
||||||
|
|
|
@ -13,9 +13,9 @@ def EN():
|
||||||
|
|
||||||
def test_one(EN):
|
def test_one(EN):
|
||||||
tokens = EN('Betty Botter bought a pound of butter.')
|
tokens = EN('Betty Botter bought a pound of butter.')
|
||||||
assert tokens[0].string == 'Betty'
|
assert tokens[0].orth_ == 'Betty'
|
||||||
tokens2 = EN('Betty also bought a pound of butter.')
|
tokens2 = EN('Betty also bought a pound of butter.')
|
||||||
assert tokens2[0].string == 'Betty'
|
assert tokens2[0].orth_ == 'Betty'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,9 +21,9 @@ def test_token(paired_puncts, EN):
|
||||||
string = open_ + word_str + close_
|
string = open_ + word_str + close_
|
||||||
tokens = EN(string)
|
tokens = EN(string)
|
||||||
assert len(tokens) == 3
|
assert len(tokens) == 3
|
||||||
assert tokens[0].string == open_
|
assert tokens[0].orth_ == open_
|
||||||
assert tokens[1].string == word_str
|
assert tokens[1].orth_ == word_str
|
||||||
assert tokens[2].string == close_
|
assert tokens[2].orth_ == close_
|
||||||
|
|
||||||
|
|
||||||
def test_two_different(paired_puncts, EN):
|
def test_two_different(paired_puncts, EN):
|
||||||
|
@ -32,9 +32,9 @@ def test_two_different(paired_puncts, EN):
|
||||||
string = "`" + open_ + word_str + close_ + "'"
|
string = "`" + open_ + word_str + close_ + "'"
|
||||||
tokens = EN(string)
|
tokens = EN(string)
|
||||||
assert len(tokens) == 5
|
assert len(tokens) == 5
|
||||||
assert tokens[0].string == "`"
|
assert tokens[0].orth_ == "`"
|
||||||
assert tokens[1].string == open_
|
assert tokens[1].orth_ == open_
|
||||||
assert tokens[2].string == word_str
|
assert tokens[2].orth_ == word_str
|
||||||
assert tokens[2].string == word_str
|
assert tokens[2].orth_ == word_str
|
||||||
assert tokens[3].string == close_
|
assert tokens[3].orth_ == close_
|
||||||
assert tokens[4].string == "'"
|
assert tokens[4].orth_ == "'"
|
||||||
|
|
|
@ -12,22 +12,22 @@ def EN():
|
||||||
|
|
||||||
def test_single_word(EN):
|
def test_single_word(EN):
|
||||||
tokens = EN(u'hello')
|
tokens = EN(u'hello')
|
||||||
assert tokens[0].string == 'hello'
|
assert tokens[0].orth_ == 'hello'
|
||||||
|
|
||||||
|
|
||||||
def test_two_words(EN):
|
def test_two_words(EN):
|
||||||
tokens = EN('hello possums')
|
tokens = EN('hello possums')
|
||||||
assert len(tokens) == 2
|
assert len(tokens) == 2
|
||||||
assert tokens[0].string != tokens[1].string
|
assert tokens[0].orth_ != tokens[1].orth_
|
||||||
|
|
||||||
|
|
||||||
def test_punct(EN):
|
def test_punct(EN):
|
||||||
tokens = EN('hello, possums.')
|
tokens = EN('hello, possums.')
|
||||||
assert len(tokens) == 4
|
assert len(tokens) == 4
|
||||||
assert tokens[0].string == 'hello'
|
assert tokens[0].orth_ == 'hello'
|
||||||
assert tokens[1].string == ','
|
assert tokens[1].orth_ == ','
|
||||||
assert tokens[2].string == 'possums'
|
assert tokens[2].orth_ == 'possums'
|
||||||
assert tokens[1].string != 'hello'
|
assert tokens[1].orth_ != 'hello'
|
||||||
|
|
||||||
|
|
||||||
def test_digits(EN):
|
def test_digits(EN):
|
||||||
|
@ -87,14 +87,14 @@ def test_cnts2(EN):
|
||||||
def test_cnts3(EN):
|
def test_cnts3(EN):
|
||||||
text = u"“Isn't it?”"
|
text = u"“Isn't it?”"
|
||||||
tokens = EN(text)
|
tokens = EN(text)
|
||||||
words = [t.string for t in tokens]
|
words = [t.orth_ for t in tokens]
|
||||||
assert len(words) == 6
|
assert len(words) == 6
|
||||||
|
|
||||||
|
|
||||||
def test_cnts4(EN):
|
def test_cnts4(EN):
|
||||||
text = u"""Yes! "I'd rather have a walk", Ms. Comble sighed. """
|
text = u"""Yes! "I'd rather have a walk", Ms. Comble sighed. """
|
||||||
tokens = EN(text)
|
tokens = EN(text)
|
||||||
words = [t.string for t in tokens]
|
words = [t.orth_ for t in tokens]
|
||||||
assert len(words) == 15
|
assert len(words) == 15
|
||||||
|
|
||||||
|
|
||||||
|
@ -107,13 +107,13 @@ def test_cnts5(EN):
|
||||||
def test_cnts6(EN):
|
def test_cnts6(EN):
|
||||||
text = u'They ran about 10km.'
|
text = u'They ran about 10km.'
|
||||||
tokens = EN(text)
|
tokens = EN(text)
|
||||||
words = [t.string for t in tokens]
|
words = [t.orth_ for t in tokens]
|
||||||
assert len(words) == 6
|
assert len(words) == 6
|
||||||
|
|
||||||
def test_bracket_period(EN):
|
def test_bracket_period(EN):
|
||||||
text = u'(And a 6a.m. run through Washington Park).'
|
text = u'(And a 6a.m. run through Washington Park).'
|
||||||
tokens = EN(text)
|
tokens = EN(text)
|
||||||
assert tokens[len(tokens) - 1].string == u'.'
|
assert tokens[len(tokens) - 1].orth_ == u'.'
|
||||||
|
|
||||||
#def test_cnts7():
|
#def test_cnts7():
|
||||||
# text = 'But then the 6,000-year ice age came...'
|
# text = 'But then the 6,000-year ice age came...'
|
||||||
|
|
|
@ -13,4 +13,4 @@ def test1(EN):
|
||||||
words = ['JAPAN', 'GET', 'LUCKY']
|
words = ['JAPAN', 'GET', 'LUCKY']
|
||||||
tokens = EN.tokenizer.tokens_from_list(words)
|
tokens = EN.tokenizer.tokens_from_list(words)
|
||||||
assert len(tokens) == 3
|
assert len(tokens) == 3
|
||||||
assert tokens[0].string == 'JAPAN'
|
assert tokens[0].orth_ == 'JAPAN'
|
||||||
|
|
|
@ -18,7 +18,7 @@ def test_single_space(EN):
|
||||||
def test_double_space(EN):
|
def test_double_space(EN):
|
||||||
tokens = EN('hello possums')
|
tokens = EN('hello possums')
|
||||||
assert len(tokens) == 3
|
assert len(tokens) == 3
|
||||||
assert tokens[1].string == ' '
|
assert tokens[1].orth_ == ' '
|
||||||
|
|
||||||
|
|
||||||
def test_newline(EN):
|
def test_newline(EN):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user