mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 05:31:15 +03:00 
			
		
		
		
	* Work around get_lex_attr bug introduced during German parsing
This commit is contained in:
		
							parent
							
								
									bc3c8d8adf
								
							
						
					
					
						commit
						35214053fd
					
				|  | @ -14,6 +14,7 @@ from spacy.syntax.parser import Parser | ||||||
| from spacy.syntax.arc_eager import ArcEager | from spacy.syntax.arc_eager import ArcEager | ||||||
| from spacy.syntax.parser import get_templates | from spacy.syntax.parser import get_templates | ||||||
| from spacy.scorer import Scorer | from spacy.scorer import Scorer | ||||||
|  | import spacy.attrs | ||||||
| 
 | 
 | ||||||
| from spacy.language import Language | from spacy.language import Language | ||||||
| 
 | 
 | ||||||
|  | @ -47,6 +48,7 @@ class TreebankParser(object): | ||||||
|     @classmethod |     @classmethod | ||||||
|     def from_dir(cls, tag_map, model_dir): |     def from_dir(cls, tag_map, model_dir): | ||||||
|         vocab = Vocab(tag_map=tag_map, get_lex_attr=Language.default_lex_attrs()) |         vocab = Vocab(tag_map=tag_map, get_lex_attr=Language.default_lex_attrs()) | ||||||
|  |         vocab.get_lex_attr[spacy.attrs.LANG] = lambda _: 0 | ||||||
|         tokenizer = Tokenizer(vocab, {}, None, None, None) |         tokenizer = Tokenizer(vocab, {}, None, None, None) | ||||||
|         tagger = Tagger.blank(vocab, TAGGER_TEMPLATES) |         tagger = Tagger.blank(vocab, TAGGER_TEMPLATES) | ||||||
| 
 | 
 | ||||||
|  | @ -99,7 +101,7 @@ def read_conllx(loc): | ||||||
|     for sent in text.strip().split('\n\n'): |     for sent in text.strip().split('\n\n'): | ||||||
|         lines = sent.strip().split('\n') |         lines = sent.strip().split('\n') | ||||||
|         if lines: |         if lines: | ||||||
|             if lines[0].startswith('#'): |             while lines[0].startswith('#'): | ||||||
|                 lines.pop(0) |                 lines.pop(0) | ||||||
|             tokens = [] |             tokens = [] | ||||||
|             for line in lines: |             for line in lines: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user