mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	* Fix Python3 problem in align_raw
This commit is contained in:
		
							parent
							
								
									c609ea18f0
								
							
						
					
					
						commit
						dcafb181b9
					
				|  | @ -1,6 +1,8 @@ | |||
| """Align the raw sentences from Read et al (2012) to the PTB tokenization, | ||||
| outputting as a .json file. Used in bin/prepare_treebank.py | ||||
| """ | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import plac | ||||
| from pathlib import Path | ||||
| import json | ||||
|  | @ -84,7 +86,6 @@ def get_alignment(raw_by_para, ptb_by_file): | |||
|     n_skipped = 0 | ||||
|     skips = [] | ||||
|     for (p_id, p_sent_id, raw) in raw_sents: | ||||
|         #print raw | ||||
|         if ptb_idx >= len(ptb_sents): | ||||
|             n_skipped += 1 | ||||
|             continue | ||||
|  | @ -104,8 +105,8 @@ def get_alignment(raw_by_para, ptb_by_file): | |||
|         output.append((f_id, p_id, f_sent_id, (ptb_id, ''.join(sepped)))) | ||||
|     if n_skipped + len(ptb_sents) != len(raw_sents): | ||||
|         for ptb, raw in skips: | ||||
|             print ptb | ||||
|             print raw | ||||
|             print(ptb) | ||||
|             print(raw) | ||||
|         raise Exception | ||||
|     return output | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user