mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 05:01:02 +03:00 
			
		
		
		
	* Add PTB file read tests
This commit is contained in:
		
							parent
							
								
									f2ee9c4feb
								
							
						
					
					
						commit
						bdaddc4103
					
				
							
								
								
									
										46
									
								
								tests/test_read_ptb.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								tests/test_read_ptb.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,46 @@ | |||
| from spacy.munge import read_ptb | ||||
| 
 | ||||
| import pytest | ||||
| 
 | ||||
| from os import path | ||||
| 
 | ||||
| ptb_loc = path.join(path.dirname(__file__), 'wsj_0001.parse') | ||||
| file3_loc = path.join(path.dirname(__file__), 'wsj_0003.parse') | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def ptb_text(): | ||||
|     return open(path.join(ptb_loc)).read() | ||||
| 
 | ||||
| 
 | ||||
| @pytest.fixture | ||||
| def sentence_strings(ptb_text): | ||||
|     return read_ptb.split(ptb_text) | ||||
| 
 | ||||
| 
 | ||||
| def test_split(sentence_strings): | ||||
|     assert len(sentence_strings) == 2 | ||||
|     assert sentence_strings[0].startswith('(TOP (S (NP-SBJ') | ||||
|     assert sentence_strings[0].endswith('(. .)))') | ||||
|     assert sentence_strings[1].startswith('(TOP (S (NP-SBJ') | ||||
|     assert sentence_strings[1].endswith('(. .)))') | ||||
| 
 | ||||
| 
 | ||||
| def test_tree_read(sentence_strings): | ||||
|     words, brackets = read_ptb.parse(sentence_strings[0]) | ||||
|     assert len(brackets) == 11 | ||||
|     string = ("Pierre Vinken , 61 years old , will join the board as a nonexecutive " | ||||
|               "director Nov. 29 .") | ||||
|     word_strings = string.split() | ||||
|     starts = [s for l, s, e in brackets] | ||||
|     ends = [e for l, s, e in brackets] | ||||
|     assert min(starts) == 0 | ||||
|     assert max(ends) == len(words) | ||||
|     assert brackets[-1] == ('S', 0, len(words)) | ||||
|     assert ('NP-SBJ', 0, 7) in brackets | ||||
| 
 | ||||
| 
 | ||||
| def test_traces(): | ||||
|     sent_strings = sentence_strings(open(file3_loc).read()) | ||||
|     words, brackets = read_ptb.parse(sent_strings[0]) | ||||
|     assert len(words) == 36 | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user