mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Move regression test for #351 to own file
This commit is contained in:
parent
667051375d
commit
59059fed27
16
spacy/tests/regression/test_issue351.py
Normal file
16
spacy/tests/regression/test_issue351.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
from __future__ import unicode_literals
|
||||
from ...en import English
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def en_tokenizer():
|
||||
return English.Defaults.create_tokenizer()
|
||||
|
||||
|
||||
def test_issue351(en_tokenizer):
|
||||
doc = en_tokenizer(" This is a cat.")
|
||||
assert doc[0].idx == 0
|
||||
assert len(doc[0]) == 3
|
||||
assert doc[1].idx == 3
|
|
@ -42,35 +42,3 @@ def test_tokenizer_splits_newline_double_space(en_tokenizer, text):
|
|||
def test_tokenizer_splits_newline_space_wrap(en_tokenizer, text):
|
||||
tokens = en_tokenizer(text)
|
||||
assert len(tokens) == 3
|
||||
|
||||
|
||||
def test_leading_space_offsets(en_tokenizer):
|
||||
'''Issue #351
|
||||
# this works
|
||||
|
||||
text1 = u"This is a cat."
|
||||
a = english_spacy(text1)
|
||||
|
||||
tok0 = list(a.sents)[0][0]
|
||||
print tok0, tok0.idx, text1[tok0.idx]
|
||||
|
||||
tok1 = list(a.sents)[0][1]
|
||||
print tok1, tok1.idx, text1[tok1.idx]
|
||||
|
||||
print "=="
|
||||
|
||||
# this does not work
|
||||
|
||||
text2 = u" This is a cat."
|
||||
b = english_spacy(text2)
|
||||
|
||||
tok0 = list(b.sents)[0][0]
|
||||
print tok0, tok0.idx, text2[tok0.idx]
|
||||
|
||||
tok1 = list(b.sents)[0][1]
|
||||
print tok1, tok1.idx, text2[tok1.idx]
|
||||
'''
|
||||
doc = en_tokenizer(u" This is a cat.")
|
||||
assert doc[0].idx == 0
|
||||
assert len(doc[0]) == 3
|
||||
assert doc[1].idx == 3
|
||||
|
|
Loading…
Reference in New Issue
Block a user