2015-02-09 02:30:46 +03:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2014-12-21 12:58:21 +03:00
|
|
|
from spacy.en import English
|
2014-12-19 19:51:25 +03:00
|
|
|
import pytest
|
|
|
|
|
2014-12-23 03:40:32 +03:00
|
|
|
|
2014-12-21 12:58:21 +03:00
|
|
|
@pytest.fixture
|
|
|
|
def EN():
|
2014-12-30 13:34:09 +03:00
|
|
|
return English()
|
2014-12-19 19:51:25 +03:00
|
|
|
|
|
|
|
@pytest.fixture
|
2014-12-21 12:58:21 +03:00
|
|
|
def tagged(EN):
|
2014-12-19 19:51:25 +03:00
|
|
|
string = u'Bananas in pyjamas are geese.'
|
2015-05-25 02:02:03 +03:00
|
|
|
tokens = EN(string, tag=True, parse=False)
|
2014-12-19 19:51:25 +03:00
|
|
|
return tokens
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def lemmas(tagged):
|
2015-01-14 19:51:47 +03:00
|
|
|
return [t.lemma_ for t in tagged]
|
2014-12-19 19:51:25 +03:00
|
|
|
|
|
|
|
|
2015-01-03 15:41:16 +03:00
|
|
|
def test_lemmas(lemmas, tagged):
|
2014-12-19 19:51:25 +03:00
|
|
|
assert lemmas[0] == 'banana'
|
|
|
|
assert lemmas[1] == 'in'
|
|
|
|
assert lemmas[2] == 'pyjama'
|
|
|
|
assert lemmas[3] == 'be'
|
2015-01-17 09:33:23 +03:00
|
|
|
if tagged[2].tag == tagged[4].tag:
|
2015-01-03 17:16:18 +03:00
|
|
|
assert lemmas[4] == 'goose'
|
2015-02-09 02:30:46 +03:00
|
|
|
|
|
|
|
|
|
|
|
def test_didnt(EN):
|
|
|
|
tokens = EN(u"I didn't do it")
|
|
|
|
assert tokens[1].lemma_ != u""
|