mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-13 18:10:35 +03:00
* Add PTB file read tests
This commit is contained in:
parent
f2ee9c4feb
commit
bdaddc4103
46
tests/test_read_ptb.py
Normal file
46
tests/test_read_ptb.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
from spacy.munge import read_ptb
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from os import path
|
||||||
|
|
||||||
|
ptb_loc = path.join(path.dirname(__file__), 'wsj_0001.parse')
|
||||||
|
file3_loc = path.join(path.dirname(__file__), 'wsj_0003.parse')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def ptb_text():
|
||||||
|
return open(path.join(ptb_loc)).read()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sentence_strings(ptb_text):
|
||||||
|
return read_ptb.split(ptb_text)
|
||||||
|
|
||||||
|
|
||||||
|
def test_split(sentence_strings):
|
||||||
|
assert len(sentence_strings) == 2
|
||||||
|
assert sentence_strings[0].startswith('(TOP (S (NP-SBJ')
|
||||||
|
assert sentence_strings[0].endswith('(. .)))')
|
||||||
|
assert sentence_strings[1].startswith('(TOP (S (NP-SBJ')
|
||||||
|
assert sentence_strings[1].endswith('(. .)))')
|
||||||
|
|
||||||
|
|
||||||
|
def test_tree_read(sentence_strings):
|
||||||
|
words, brackets = read_ptb.parse(sentence_strings[0])
|
||||||
|
assert len(brackets) == 11
|
||||||
|
string = ("Pierre Vinken , 61 years old , will join the board as a nonexecutive "
|
||||||
|
"director Nov. 29 .")
|
||||||
|
word_strings = string.split()
|
||||||
|
starts = [s for l, s, e in brackets]
|
||||||
|
ends = [e for l, s, e in brackets]
|
||||||
|
assert min(starts) == 0
|
||||||
|
assert max(ends) == len(words)
|
||||||
|
assert brackets[-1] == ('S', 0, len(words))
|
||||||
|
assert ('NP-SBJ', 0, 7) in brackets
|
||||||
|
|
||||||
|
|
||||||
|
def test_traces():
|
||||||
|
sent_strings = sentence_strings(open(file3_loc).read())
|
||||||
|
words, brackets = read_ptb.parse(sent_strings[0])
|
||||||
|
assert len(words) == 36
|
Loading…
Reference in New Issue
Block a user