mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-23 14:33:20 +03:00
Move regression test for #351 to own file
This commit is contained in:
parent
667051375d
commit
59059fed27
16
spacy/tests/regression/test_issue351.py
Normal file
16
spacy/tests/regression/test_issue351.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from ...en import English
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def en_tokenizer():
|
||||||
|
return English.Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
|
def test_issue351(en_tokenizer):
|
||||||
|
doc = en_tokenizer(" This is a cat.")
|
||||||
|
assert doc[0].idx == 0
|
||||||
|
assert len(doc[0]) == 3
|
||||||
|
assert doc[1].idx == 3
|
|
@ -42,35 +42,3 @@ def test_tokenizer_splits_newline_double_space(en_tokenizer, text):
|
||||||
def test_tokenizer_splits_newline_space_wrap(en_tokenizer, text):
|
def test_tokenizer_splits_newline_space_wrap(en_tokenizer, text):
|
||||||
tokens = en_tokenizer(text)
|
tokens = en_tokenizer(text)
|
||||||
assert len(tokens) == 3
|
assert len(tokens) == 3
|
||||||
|
|
||||||
|
|
||||||
def test_leading_space_offsets(en_tokenizer):
|
|
||||||
'''Issue #351
|
|
||||||
# this works
|
|
||||||
|
|
||||||
text1 = u"This is a cat."
|
|
||||||
a = english_spacy(text1)
|
|
||||||
|
|
||||||
tok0 = list(a.sents)[0][0]
|
|
||||||
print tok0, tok0.idx, text1[tok0.idx]
|
|
||||||
|
|
||||||
tok1 = list(a.sents)[0][1]
|
|
||||||
print tok1, tok1.idx, text1[tok1.idx]
|
|
||||||
|
|
||||||
print "=="
|
|
||||||
|
|
||||||
# this does not work
|
|
||||||
|
|
||||||
text2 = u" This is a cat."
|
|
||||||
b = english_spacy(text2)
|
|
||||||
|
|
||||||
tok0 = list(b.sents)[0][0]
|
|
||||||
print tok0, tok0.idx, text2[tok0.idx]
|
|
||||||
|
|
||||||
tok1 = list(b.sents)[0][1]
|
|
||||||
print tok1, tok1.idx, text2[tok1.idx]
|
|
||||||
'''
|
|
||||||
doc = en_tokenizer(u" This is a cat.")
|
|
||||||
assert doc[0].idx == 0
|
|
||||||
assert len(doc[0]) == 3
|
|
||||||
assert doc[1].idx == 3
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user