mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Merge pull request #6201 from svlandeg/fix/error_nr
This commit is contained in:
		
						commit
						6260fa3c10
					
				| 
						 | 
					@ -456,10 +456,10 @@ class Errors:
 | 
				
			||||||
            "issue tracker: http://github.com/explosion/spaCy/issues")
 | 
					            "issue tracker: http://github.com/explosion/spaCy/issues")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # TODO: fix numbering after merging develop into master
 | 
					    # TODO: fix numbering after merging develop into master
 | 
				
			||||||
    E092 = ("The sentence-per-line IOB/IOB2 file is not formatted correctly. "
 | 
					    E902 = ("The sentence-per-line IOB/IOB2 file is not formatted correctly. "
 | 
				
			||||||
            "Try checking whitespace and delimiters. See "
 | 
					            "Try checking whitespace and delimiters. See "
 | 
				
			||||||
            "https://nightly.spacy.io/api/cli#convert")
 | 
					            "https://nightly.spacy.io/api/cli#convert")
 | 
				
			||||||
    E093 = ("The token-per-line NER file is not formatted correctly. Try checking "
 | 
					    E903 = ("The token-per-line NER file is not formatted correctly. Try checking "
 | 
				
			||||||
            "whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert")
 | 
					            "whitespace and delimiters. See https://nightly.spacy.io/api/cli#convert")
 | 
				
			||||||
    E904 = ("Cannot initialize StaticVectors layer: nO dimension unset. This "
 | 
					    E904 = ("Cannot initialize StaticVectors layer: nO dimension unset. This "
 | 
				
			||||||
            "dimension refers to the output width, after the linear projection "
 | 
					            "dimension refers to the output width, after the linear projection "
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -103,7 +103,7 @@ def conll_ner_to_docs(
 | 
				
			||||||
            lines = [line.strip() for line in conll_sent.split("\n") if line.strip()]
 | 
					            lines = [line.strip() for line in conll_sent.split("\n") if line.strip()]
 | 
				
			||||||
            cols = list(zip(*[line.split() for line in lines]))
 | 
					            cols = list(zip(*[line.split() for line in lines]))
 | 
				
			||||||
            if len(cols) < 2:
 | 
					            if len(cols) < 2:
 | 
				
			||||||
                raise ValueError(Errors.E093)
 | 
					                raise ValueError(Errors.E903)
 | 
				
			||||||
            length = len(cols[0])
 | 
					            length = len(cols[0])
 | 
				
			||||||
            words.extend(cols[0])
 | 
					            words.extend(cols[0])
 | 
				
			||||||
            sent_starts.extend([True] + [False] * (length - 1))
 | 
					            sent_starts.extend([True] + [False] * (length - 1))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -46,7 +46,7 @@ def read_iob(raw_sents, vocab, n_sents):
 | 
				
			||||||
                sent_words, sent_iob = zip(*sent_tokens)
 | 
					                sent_words, sent_iob = zip(*sent_tokens)
 | 
				
			||||||
                sent_tags = ["-"] * len(sent_words)
 | 
					                sent_tags = ["-"] * len(sent_words)
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                raise ValueError(Errors.E092)
 | 
					                raise ValueError(Errors.E902)
 | 
				
			||||||
            words.extend(sent_words)
 | 
					            words.extend(sent_words)
 | 
				
			||||||
            tags.extend(sent_tags)
 | 
					            tags.extend(sent_tags)
 | 
				
			||||||
            iob.extend(sent_iob)
 | 
					            iob.extend(sent_iob)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user