mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	fix test_cli
This commit is contained in:
		
							parent
							
								
									f7ad8e8c83
								
							
						
					
					
						commit
						2d9f406188
					
				|  | @ -4,7 +4,6 @@ import random | ||||||
| import warnings | import warnings | ||||||
| import srsly | import srsly | ||||||
| import spacy | import spacy | ||||||
| from spacy.gold import GoldParse |  | ||||||
| from spacy.util import minibatch, compounding | from spacy.util import minibatch, compounding | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -19,7 +19,7 @@ from ml_datasets import loaders | ||||||
| import spacy | import spacy | ||||||
| from spacy import util | from spacy import util | ||||||
| from spacy.util import minibatch, compounding | from spacy.util import minibatch, compounding | ||||||
| from spacy.gold import Example, GoldParse | from spacy.gold import Example | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @plac.annotations( | @plac.annotations( | ||||||
|  |  | ||||||
|  | @ -12,7 +12,6 @@ def conllu2json( | ||||||
|     input_data, |     input_data, | ||||||
|     n_sents=10, |     n_sents=10, | ||||||
|     append_morphology=False, |     append_morphology=False, | ||||||
|     lang=None, |  | ||||||
|     ner_map=None, |     ner_map=None, | ||||||
|     merge_subtokens=False, |     merge_subtokens=False, | ||||||
|     no_print=False, |     no_print=False, | ||||||
|  | @ -41,10 +40,10 @@ def conllu2json( | ||||||
|     ) |     ) | ||||||
|     has_ner_tags = has_ner(input_data, MISC_NER_PATTERN) |     has_ner_tags = has_ner(input_data, MISC_NER_PATTERN) | ||||||
|     for i, example in enumerate(conll_data): |     for i, example in enumerate(conll_data): | ||||||
|         raw += example.predicted.text |         raw += example.text | ||||||
|         sentences.append( |         sentences.append( | ||||||
|             generate_sentence( |             generate_sentence( | ||||||
|                 example, |                 example.to_dict(), | ||||||
|                 has_ner_tags, |                 has_ner_tags, | ||||||
|                 MISC_NER_PATTERN, |                 MISC_NER_PATTERN, | ||||||
|                 ner_map=ner_map, |                 ner_map=ner_map, | ||||||
|  | @ -145,21 +144,21 @@ def get_entities(lines, tag_pattern, ner_map=None): | ||||||
|     return iob_to_biluo(iob) |     return iob_to_biluo(iob) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def generate_sentence(token_annotation, has_ner_tags, tag_pattern, ner_map=None): | def generate_sentence(example_dict, has_ner_tags, tag_pattern, ner_map=None): | ||||||
|     sentence = {} |     sentence = {} | ||||||
|     tokens = [] |     tokens = [] | ||||||
|     for i, id_ in enumerate(token_annotation.ids): |     for i, id_ in enumerate(example_dict["token_annotation"]["ids"]): | ||||||
|         token = {} |         token = {} | ||||||
|         token["id"] = id_ |         token["id"] = id_ | ||||||
|         token["orth"] = token_annotation.get_word(i) |         token["orth"] = example_dict["token_annotation"]["words"][i] | ||||||
|         token["tag"] = token_annotation.get_tag(i) |         token["tag"] = example_dict["token_annotation"]["tags"][i] | ||||||
|         token["pos"] = token_annotation.get_pos(i) |         token["pos"] = example_dict["token_annotation"]["pos"][i] | ||||||
|         token["lemma"] = token_annotation.get_lemma(i) |         token["lemma"] = example_dict["token_annotation"]["lemmas"][i] | ||||||
|         token["morph"] = token_annotation.get_morph(i) |         token["morph"] = example_dict["token_annotation"]["morphs"][i] | ||||||
|         token["head"] = token_annotation.get_head(i) - id_ |         token["head"] = example_dict["token_annotation"]["heads"][i] - i | ||||||
|         token["dep"] = token_annotation.get_dep(i) |         token["dep"] = example_dict["token_annotation"]["deps"][i] | ||||||
|         if has_ner_tags: |         if has_ner_tags: | ||||||
|             token["ner"] = token_annotation.get_entity(i) |             token["ner"] = example_dict["doc_annotation"]["entities"][i] | ||||||
|         tokens.append(token) |         tokens.append(token) | ||||||
|     sentence["tokens"] = tokens |     sentence["tokens"] = tokens | ||||||
|     return sentence |     return sentence | ||||||
|  | @ -251,6 +250,7 @@ def example_from_conllu_sentence( | ||||||
|     for i in range(len(doc)): |     for i in range(len(doc)): | ||||||
|         doc[i].tag_ = tags[i] |         doc[i].tag_ = tags[i] | ||||||
|         doc[i].pos_ = poses[i] |         doc[i].pos_ = poses[i] | ||||||
|  |         doc[i].morph_ = morphs[i] | ||||||
|         doc[i].dep_ = deps[i] |         doc[i].dep_ = deps[i] | ||||||
|         doc[i].lemma_ = lemmas[i] |         doc[i].lemma_ = lemmas[i] | ||||||
|         doc[i].head = doc[heads[i]] |         doc[i].head = doc[heads[i]] | ||||||
|  | @ -267,14 +267,26 @@ def example_from_conllu_sentence( | ||||||
|         doc = merge_conllu_subtokens(lines, doc) |         doc = merge_conllu_subtokens(lines, doc) | ||||||
| 
 | 
 | ||||||
|     # create Example from custom Doc annotation |     # create Example from custom Doc annotation | ||||||
|     words, spaces = [], [] |     words, spaces, tags, morphs, lemmas = [], [], [], [], [] | ||||||
|     for i, t in enumerate(doc): |     for i, t in enumerate(doc): | ||||||
|         words.append(t._.merged_orth) |         words.append(t._.merged_orth) | ||||||
|  |         lemmas.append(t._.merged_lemma) | ||||||
|         spaces.append(t._.merged_spaceafter) |         spaces.append(t._.merged_spaceafter) | ||||||
|  |         morphs.append(t._.merged_morph) | ||||||
|         if append_morphology and t._.merged_morph: |         if append_morphology and t._.merged_morph: | ||||||
|             t.tag_ = t.tag_ + "__" + t._.merged_morph |             tags.append(t.tag_ + "__" + t._.merged_morph) | ||||||
|  |         else: | ||||||
|  |             tags.append(t.tag_) | ||||||
| 
 | 
 | ||||||
|     return Example(predicted=Doc(vocab, words=words, spaces=spaces), reference=doc) |     doc_x = Doc(vocab, words=words, spaces=spaces) | ||||||
|  |     ref_dict = Example(doc_x, reference=doc).to_dict() | ||||||
|  |     ref_dict["words"] = words | ||||||
|  |     ref_dict["lemmas"] = lemmas | ||||||
|  |     ref_dict["spaces"] = spaces | ||||||
|  |     ref_dict["tags"] = tags | ||||||
|  |     ref_dict["morphs"] = morphs | ||||||
|  |     example = Example.from_dict(doc_x, ref_dict) | ||||||
|  |     return example | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def merge_conllu_subtokens(lines, doc): | def merge_conllu_subtokens(lines, doc): | ||||||
|  |  | ||||||
|  | @ -167,8 +167,15 @@ cdef class Example: | ||||||
|             ) |             ) | ||||||
|         return output |         return output | ||||||
| 
 | 
 | ||||||
|     def text(self): |     property text: | ||||||
|         return self.x.text |         def __get__(self): | ||||||
|  |             return self.x.text | ||||||
|  | 
 | ||||||
|  |     def __str__(self): | ||||||
|  |         return str(self.to_dict()) | ||||||
|  | 
 | ||||||
|  |     def __repr__(self): | ||||||
|  |         return str(self.to_dict()) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _annot2array(vocab, tok_annot, doc_annot): | def _annot2array(vocab, tok_annot, doc_annot): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user