mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
47 lines
1.3 KiB
Python
47 lines
1.3 KiB
Python
from spacy.munge import read_ptb
|
|
|
|
import pytest
|
|
|
|
from os import path
|
|
|
|
ptb_loc = path.join(path.dirname(__file__), 'wsj_0001.parse')
|
|
file3_loc = path.join(path.dirname(__file__), 'wsj_0003.parse')
|
|
|
|
|
|
@pytest.fixture
|
|
def ptb_text():
|
|
return open(path.join(ptb_loc)).read()
|
|
|
|
|
|
@pytest.fixture
|
|
def sentence_strings(ptb_text):
|
|
return read_ptb.split(ptb_text)
|
|
|
|
|
|
def test_split(sentence_strings):
|
|
assert len(sentence_strings) == 2
|
|
assert sentence_strings[0].startswith('(TOP (S (NP-SBJ')
|
|
assert sentence_strings[0].endswith('(. .)))')
|
|
assert sentence_strings[1].startswith('(TOP (S (NP-SBJ')
|
|
assert sentence_strings[1].endswith('(. .)))')
|
|
|
|
|
|
def test_tree_read(sentence_strings):
|
|
words, brackets = read_ptb.parse(sentence_strings[0])
|
|
assert len(brackets) == 11
|
|
string = ("Pierre Vinken , 61 years old , will join the board as a nonexecutive "
|
|
"director Nov. 29 .")
|
|
word_strings = string.split()
|
|
starts = [s for l, s, e in brackets]
|
|
ends = [e for l, s, e in brackets]
|
|
assert min(starts) == 0
|
|
assert max(ends) == len(words)
|
|
assert brackets[-1] == ('S', 0, len(words))
|
|
assert ('NP-SBJ', 0, 7) in brackets
|
|
|
|
|
|
def test_traces():
|
|
sent_strings = sentence_strings(open(file3_loc).read())
|
|
words, brackets = read_ptb.parse(sent_strings[0])
|
|
assert len(words) == 36
|