mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Fix ud_train.py
This commit is contained in:
		
							parent
							
								
									ae7c728c5f
								
							
						
					
					
						commit
						98dfe5e433
					
				|  | @ -156,13 +156,8 @@ def _make_gold(nlp, text, sent_annots, drop_deps=0.0): | ||||||
|     flat = defaultdict(list) |     flat = defaultdict(list) | ||||||
|     sent_starts = [] |     sent_starts = [] | ||||||
|     for sent in sent_annots: |     for sent in sent_annots: | ||||||
| <<<<<<< HEAD:spacy/cli/ud_train.py |         flat["heads"].extend(len(flat["words"])+head for head in sent["heads"]) | ||||||
|         flat['heads'].extend(len(flat['words'])+head for head in sent['heads']) |         for field in ["words", "tags", "deps", "morphology", "entities", "spaces"]: | ||||||
|         for field in ['words', 'tags', 'deps', 'morphology', 'entities', 'spaces']: |  | ||||||
| ======= |  | ||||||
|         flat["heads"].extend(len(flat["words"]) + head for head in sent["heads"]) |  | ||||||
|         for field in ["words", "tags", "deps", "entities", "spaces"]: |  | ||||||
| >>>>>>> develop:spacy/cli/ud/ud_train.py |  | ||||||
|             flat[field].extend(sent[field]) |             flat[field].extend(sent[field]) | ||||||
|         sent_starts.append(True) |         sent_starts.append(True) | ||||||
|         sent_starts.extend([False] * (len(sent["words"]) - 1)) |         sent_starts.extend([False] * (len(sent["words"]) - 1)) | ||||||
|  | @ -260,55 +255,30 @@ def write_conllu(docs, file_): | ||||||
| 
 | 
 | ||||||
| def print_progress(itn, losses, ud_scores): | def print_progress(itn, losses, ud_scores): | ||||||
|     fields = { |     fields = { | ||||||
| <<<<<<< HEAD:spacy/cli/ud_train.py |  | ||||||
|         'dep_loss': losses.get('parser', 0.0), |  | ||||||
|         'morph_loss': losses.get('morphologizer', 0.0), |  | ||||||
|         'tag_loss': losses.get('tagger', 0.0), |  | ||||||
|         'words': ud_scores['Words'].f1 * 100, |  | ||||||
|         'sents': ud_scores['Sentences'].f1 * 100, |  | ||||||
|         'tags': ud_scores['XPOS'].f1 * 100, |  | ||||||
|         'uas': ud_scores['UAS'].f1 * 100, |  | ||||||
|         'las': ud_scores['LAS'].f1 * 100, |  | ||||||
|         'morph': ud_scores['Feats'].f1 * 100, |  | ||||||
|     } |  | ||||||
|     header = ['Epoch', 'P.Loss', 'M.Loss', 'LAS', 'UAS', 'TAG', 'MORPH', 'SENT', 'WORD'] |  | ||||||
|     if itn == 0: |  | ||||||
|         print('\t'.join(header)) |  | ||||||
|     tpl = '\t'.join(( |  | ||||||
|         '{:d}', |  | ||||||
|         '{dep_loss:.1f}', |  | ||||||
|         '{morph_loss:.1f}', |  | ||||||
|         '{las:.1f}', |  | ||||||
|         '{uas:.1f}', |  | ||||||
|         '{tags:.1f}', |  | ||||||
|         '{morph:.1f}', |  | ||||||
|         '{sents:.1f}', |  | ||||||
|         '{words:.1f}', |  | ||||||
|     )) |  | ||||||
| ======= |  | ||||||
|         "dep_loss": losses.get("parser", 0.0), |         "dep_loss": losses.get("parser", 0.0), | ||||||
|  |         "morph_loss": losses.get("morphologizer", 0.0), | ||||||
|         "tag_loss": losses.get("tagger", 0.0), |         "tag_loss": losses.get("tagger", 0.0), | ||||||
|         "words": ud_scores["Words"].f1 * 100, |         "words": ud_scores["Words"].f1 * 100, | ||||||
|         "sents": ud_scores["Sentences"].f1 * 100, |         "sents": ud_scores["Sentences"].f1 * 100, | ||||||
|         "tags": ud_scores["XPOS"].f1 * 100, |         "tags": ud_scores["XPOS"].f1 * 100, | ||||||
|         "uas": ud_scores["UAS"].f1 * 100, |         "uas": ud_scores["UAS"].f1 * 100, | ||||||
|         "las": ud_scores["LAS"].f1 * 100, |         "las": ud_scores["LAS"].f1 * 100, | ||||||
|  |         "morph": ud_scores["Feats"].f1 * 100, | ||||||
|     } |     } | ||||||
|     header = ["Epoch", "Loss", "LAS", "UAS", "TAG", "SENT", "WORD"] |     header = ["Epoch", "P.Loss", "M.Loss", "LAS", "UAS", "TAG", "MORPH", "SENT", "WORD"] | ||||||
|     if itn == 0: |     if itn == 0: | ||||||
|         print("\t".join(header)) |         print("\t".join(header)) | ||||||
|     tpl = "\t".join( |     tpl = "\t".join(( | ||||||
|         ( |  | ||||||
|         "{:d}", |         "{:d}", | ||||||
|         "{dep_loss:.1f}", |         "{dep_loss:.1f}", | ||||||
|  |         "{morph_loss:.1f}", | ||||||
|         "{las:.1f}", |         "{las:.1f}", | ||||||
|         "{uas:.1f}", |         "{uas:.1f}", | ||||||
|         "{tags:.1f}", |         "{tags:.1f}", | ||||||
|  |         "{morph:.1f}", | ||||||
|         "{sents:.1f}", |         "{sents:.1f}", | ||||||
|         "{words:.1f}", |         "{words:.1f}", | ||||||
|         ) |     )) | ||||||
|     ) |  | ||||||
| >>>>>>> develop:spacy/cli/ud/ud_train.py |  | ||||||
|     print(tpl.format(itn, **fields)) |     print(tpl.format(itn, **fields)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -329,48 +299,26 @@ def get_token_conllu(token, i): | ||||||
|         head = 0 |         head = 0 | ||||||
|     else: |     else: | ||||||
|         head = i + (token.head.i - token.i) + 1 |         head = i + (token.head.i - token.i) + 1 | ||||||
| <<<<<<< HEAD:spacy/cli/ud_train.py |  | ||||||
|     features = token.vocab.morphology.get(token.morph_key) |     features = token.vocab.morphology.get(token.morph_key) | ||||||
|     feat_str = [] |     feat_str = [] | ||||||
|     replacements = {'one': '1', 'two': '2', 'three': '3'} |     replacements = {"one": "1", "two": "2", "three": "3"} | ||||||
|     for feat in features: |     for feat in features: | ||||||
|         if not feat.startswith('begin') and not feat.startswith('end'): |         if not feat.startswith("begin") and not feat.startswith("end"): | ||||||
|             key, value = feat.split('_') |             key, value = feat.split("_") | ||||||
|             value = replacements.get(value, value) |             value = replacements.get(value, value) | ||||||
|             feat_str.append('%s=%s' % (key, value.title())) |             feat_str.append("%s=%s" % (key, value.title())) | ||||||
|     if not feat_str: |     if not feat_str: | ||||||
|         feat_str = '_' |         feat_str = "_" | ||||||
|     else: |     else: | ||||||
|         feat_str = '|'.join(feat_str) |         feat_str = "|".join(feat_str) | ||||||
|     fields = [str(i+1), token.text, token.lemma_, token.pos_, token.tag_, feat_str, |     fields = [str(i+1), token.text, token.lemma_, token.pos_, token.tag_, feat_str, | ||||||
|               str(head), token.dep_.lower(), '_', '_'] |               str(head), token.dep_.lower(), "_", "_"] | ||||||
|     lines.append('\t'.join(fields)) |  | ||||||
|     return '\n'.join(lines) |  | ||||||
| 
 |  | ||||||
| Token.set_extension('get_conllu_lines', method=get_token_conllu) |  | ||||||
| Token.set_extension('begins_fused', default=False) |  | ||||||
| Token.set_extension('inside_fused', default=False) |  | ||||||
| ======= |  | ||||||
|     fields = [ |  | ||||||
|         str(i + 1), |  | ||||||
|         token.text, |  | ||||||
|         token.lemma_, |  | ||||||
|         token.pos_, |  | ||||||
|         token.tag_, |  | ||||||
|         "_", |  | ||||||
|         str(head), |  | ||||||
|         token.dep_.lower(), |  | ||||||
|         "_", |  | ||||||
|         "_", |  | ||||||
|     ] |  | ||||||
|     lines.append("\t".join(fields)) |     lines.append("\t".join(fields)) | ||||||
|     return "\n".join(lines) |     return "\n".join(lines) | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| Token.set_extension("get_conllu_lines", method=get_token_conllu) | Token.set_extension("get_conllu_lines", method=get_token_conllu) | ||||||
| Token.set_extension("begins_fused", default=False) | Token.set_extension("begins_fused", default=False) | ||||||
| Token.set_extension("inside_fused", default=False) | Token.set_extension("inside_fused", default=False) | ||||||
| >>>>>>> develop:spacy/cli/ud/ud_train.py |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| ################## | ################## | ||||||
|  | @ -394,14 +342,9 @@ def load_nlp(corpus, config, vectors=None): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def initialize_pipeline(nlp, docs, golds, config, device): | def initialize_pipeline(nlp, docs, golds, config, device): | ||||||
| <<<<<<< HEAD:spacy/cli/ud_train.py |  | ||||||
|     nlp.add_pipe(nlp.create_pipe('tagger')) |  | ||||||
|     nlp.add_pipe(nlp.create_pipe('morphologizer')) |  | ||||||
|     nlp.add_pipe(nlp.create_pipe('parser')) |  | ||||||
| ======= |  | ||||||
|     nlp.add_pipe(nlp.create_pipe("tagger")) |     nlp.add_pipe(nlp.create_pipe("tagger")) | ||||||
|  |     nlp.add_pipe(nlp.create_pipe("morphologizer")) | ||||||
|     nlp.add_pipe(nlp.create_pipe("parser")) |     nlp.add_pipe(nlp.create_pipe("parser")) | ||||||
| >>>>>>> develop:spacy/cli/ud/ud_train.py |  | ||||||
|     if config.multitask_tag: |     if config.multitask_tag: | ||||||
|         nlp.parser.add_multitask_objective("tag") |         nlp.parser.add_multitask_objective("tag") | ||||||
|     if config.multitask_sent: |     if config.multitask_sent: | ||||||
|  | @ -597,23 +540,12 @@ def main( | ||||||
|         out_path = parses_dir / corpus / "epoch-{i}.conllu".format(i=i) |         out_path = parses_dir / corpus / "epoch-{i}.conllu".format(i=i) | ||||||
|         with nlp.use_params(optimizer.averages): |         with nlp.use_params(optimizer.averages): | ||||||
|             if use_oracle_segments: |             if use_oracle_segments: | ||||||
| <<<<<<< HEAD:spacy/cli/ud_train.py |  | ||||||
|                 parsed_docs, scores = evaluate(nlp, paths.dev.conllu, |                 parsed_docs, scores = evaluate(nlp, paths.dev.conllu, | ||||||
|                                                 paths.dev.conllu, out_path) |                                                 paths.dev.conllu, out_path) | ||||||
|             else: |             else: | ||||||
|                 parsed_docs, scores = evaluate(nlp, paths.dev.text, |                 parsed_docs, scores = evaluate(nlp, paths.dev.text, | ||||||
|                                                 paths.dev.conllu, out_path) |                                                 paths.dev.conllu, out_path) | ||||||
|         print_progress(i, losses, scores) |         print_progress(i, losses, scores) | ||||||
| ======= |  | ||||||
|                 parsed_docs, scores = evaluate( |  | ||||||
|                     nlp, paths.dev.conllu, paths.dev.conllu, out_path |  | ||||||
|                 ) |  | ||||||
|             else: |  | ||||||
|                 parsed_docs, scores = evaluate( |  | ||||||
|                     nlp, paths.dev.text, paths.dev.conllu, out_path |  | ||||||
|                 ) |  | ||||||
|             print_progress(i, losses, scores) |  | ||||||
| >>>>>>> develop:spacy/cli/ud/ud_train.py |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _render_parses(i, to_render): | def _render_parses(i, to_render): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user