* Update tests, preventing the parser from being loaded if possible

This commit is contained in:
Matthew Honnibal 2015-05-25 01:02:03 +02:00
parent a9c70c9447
commit 89c3364041
10 changed files with 22 additions and 25 deletions

View File

@ -11,7 +11,7 @@ def EN():
@pytest.fixture
def tagged(EN):
string = u'Bananas in pyjamas are geese.'
tokens = EN(string, tag=True)
tokens = EN(string, tag=True, parse=False)
return tokens

View File

@ -11,7 +11,7 @@ EN = English()
def test_attr_of_token():
text = u'An example sentence.'
tokens = EN(text)
tokens = EN(text, tag=True, parse=False)
example = EN.vocab[u'example']
assert example.orth != example.shape
feats_array = tokens.to_array((attrs.ORTH, attrs.SHAPE))

View File

@ -11,7 +11,7 @@ def orths(tokens):
def test_simple_two():
tokens = NLU('I lost money and pride.')
tokens = NLU('I lost money and pride.', tag=True, parse=False)
pride = tokens[4]
assert orths(pride.conjuncts) == ['money', 'pride']
money = tokens[2]

View File

@ -3,26 +3,23 @@ import pytest
from spacy.en import English
@pytest.fixture
def EN():
return English()
EN = English()
def test_possess(EN):
tokens = EN("Mike's", parse=False)
def test_possess():
tokens = EN("Mike's", parse=False, tag=False)
assert EN.vocab.strings[tokens[0].orth] == "Mike"
assert EN.vocab.strings[tokens[1].orth] == "'s"
assert len(tokens) == 2
def test_apostrophe(EN):
tokens = EN("schools'")
def test_apostrophe():
tokens = EN("schools'", parse=False, tag=False)
assert len(tokens) == 2
assert tokens[1].orth_ == "'"
assert tokens[0].orth_ == "schools"
def test_LL(EN):
def test_LL():
tokens = EN("we'll", parse=False)
assert len(tokens) == 2
assert tokens[1].orth_ == "'ll"
@ -30,7 +27,7 @@ def test_LL(EN):
assert tokens[0].orth_ == "we"
def test_aint(EN):
def test_aint():
tokens = EN("ain't", parse=False)
assert len(tokens) == 2
assert tokens[0].orth_ == "ai"
@ -39,7 +36,7 @@ def test_aint(EN):
assert tokens[1].lemma_ == "not"
def test_capitalized(EN):
def test_capitalized():
tokens = EN("can't", parse=False)
assert len(tokens) == 2
tokens = EN("Can't", parse=False)
@ -50,7 +47,7 @@ def test_capitalized(EN):
assert tokens[0].lemma_ == "be"
def test_punct(EN):
def test_punct():
tokens = EN("We've", parse=False)
assert len(tokens) == 2
tokens = EN("``We've", parse=False)

View File

@ -11,7 +11,7 @@ def EN():
def test_tweebo_challenge(EN):
text = u""":o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| ") :> ...."""
tokens = EN(text)
tokens = EN(text, parse=False, tag=False)
assert tokens[0].orth_ == ":o"
assert tokens[1].orth_ == ":/"
assert tokens[2].orth_ == ":'("

View File

@ -12,7 +12,7 @@ from spacy.en import English
def test_period():
EN = English()
tokens = EN('best.Known')
tokens = EN.tokenizer('best.Known')
assert len(tokens) == 3
tokens = EN('zombo.com')
assert len(tokens) == 1

View File

@ -20,7 +20,7 @@ def morph_exc():
def test_load_exc(EN, morph_exc):
EN.tagger.load_morph_exceptions(morph_exc)
tokens = EN('I like his style.', tag=True)
tokens = EN('I like his style.', tag=True, parse=False)
his = tokens[2]
assert his.tag_ == 'PRP$'
assert his.lemma_ == '-PRP-'

View File

@ -19,7 +19,7 @@ def test_close(close_puncts, EN):
word_str = 'Hello'
for p in close_puncts:
string = word_str + p
tokens = EN(string)
tokens = EN(string, parse=False, tag=False)
assert len(tokens) == 2
assert tokens[1].string == p
assert tokens[0].string == word_str
@ -29,7 +29,7 @@ def test_two_different_close(close_puncts, EN):
word_str = 'Hello'
for p in close_puncts:
string = word_str + p + "'"
tokens = EN(string)
tokens = EN(string, parse=False, tag=False)
assert len(tokens) == 3
assert tokens[0].string == word_str
assert tokens[1].string == p
@ -40,12 +40,12 @@ def test_three_same_close(close_puncts, EN):
word_str = 'Hello'
for p in close_puncts:
string = word_str + p + p + p
tokens = EN(string)
tokens = EN(string, tag=False, parse=False)
assert len(tokens) == 4
assert tokens[0].string == word_str
assert tokens[1].string == p
def test_double_end_quote(EN):
assert len(EN("Hello''")) == 2
assert len(EN("''")) == 1
assert len(EN("Hello''", tag=False, parse=False)) == 2
assert len(EN("''", tag=False, parse=False)) == 1

View File

@ -12,7 +12,7 @@ def paired_puncts():
@pytest.fixture
def EN():
return English()
return English().tokenizer
def test_token(paired_puncts, EN):

View File

@ -7,7 +7,7 @@ import pytest
@pytest.fixture
def EN():
return English()
return English().tokenizer
def test_single_space(EN):