2014-07-07 06:23:46 +04:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2014-12-23 05:18:59 +03:00
|
|
|
from spacy.en import English
|
2014-07-07 06:23:46 +04:00
|
|
|
from spacy.util import utf8open
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
import os
|
|
|
|
from os import path
|
|
|
|
|
|
|
|
|
|
|
|
HERE = path.dirname(__file__)
|
|
|
|
|
2014-07-07 07:11:04 +04:00
|
|
|
|
2014-07-07 06:23:46 +04:00
|
|
|
@pytest.fixture
|
|
|
|
def sun_txt():
|
|
|
|
loc = path.join(HERE, 'sun.txt')
|
|
|
|
return utf8open(loc).read()
|
|
|
|
|
|
|
|
|
|
|
|
def test_tokenize(sun_txt):
|
2014-12-23 05:18:59 +03:00
|
|
|
nlp = English(tag=False, parse=False)
|
2014-07-07 06:23:46 +04:00
|
|
|
assert len(sun_txt) != 0
|
2014-12-23 05:18:59 +03:00
|
|
|
tokens = nlp(sun_txt)
|
2014-07-07 06:23:46 +04:00
|
|
|
assert True
|