mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-03 13:14:11 +03:00
* Add PTB file read tests
This commit is contained in:
parent
f2ee9c4feb
commit
bdaddc4103
46
tests/test_read_ptb.py
Normal file
46
tests/test_read_ptb.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
from spacy.munge import read_ptb
|
||||
|
||||
import pytest
|
||||
|
||||
from os import path
|
||||
|
||||
ptb_loc = path.join(path.dirname(__file__), 'wsj_0001.parse')
|
||||
file3_loc = path.join(path.dirname(__file__), 'wsj_0003.parse')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ptb_text():
|
||||
return open(path.join(ptb_loc)).read()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sentence_strings(ptb_text):
|
||||
return read_ptb.split(ptb_text)
|
||||
|
||||
|
||||
def test_split(sentence_strings):
|
||||
assert len(sentence_strings) == 2
|
||||
assert sentence_strings[0].startswith('(TOP (S (NP-SBJ')
|
||||
assert sentence_strings[0].endswith('(. .)))')
|
||||
assert sentence_strings[1].startswith('(TOP (S (NP-SBJ')
|
||||
assert sentence_strings[1].endswith('(. .)))')
|
||||
|
||||
|
||||
def test_tree_read(sentence_strings):
|
||||
words, brackets = read_ptb.parse(sentence_strings[0])
|
||||
assert len(brackets) == 11
|
||||
string = ("Pierre Vinken , 61 years old , will join the board as a nonexecutive "
|
||||
"director Nov. 29 .")
|
||||
word_strings = string.split()
|
||||
starts = [s for l, s, e in brackets]
|
||||
ends = [e for l, s, e in brackets]
|
||||
assert min(starts) == 0
|
||||
assert max(ends) == len(words)
|
||||
assert brackets[-1] == ('S', 0, len(words))
|
||||
assert ('NP-SBJ', 0, 7) in brackets
|
||||
|
||||
|
||||
def test_traces():
|
||||
sent_strings = sentence_strings(open(file3_loc).read())
|
||||
words, brackets = read_ptb.parse(sent_strings[0])
|
||||
assert len(words) == 36
|
Loading…
Reference in New Issue
Block a user