* Merge train.py

This commit is contained in:
Matthew Honnibal 2015-02-17 22:02:09 -05:00
parent 2e3dc3dfe2
commit 3a302ae6f2

View File

@ -61,8 +61,8 @@ def read_docparse_gold(file_):
tags = [] tags = []
ids = [] ids = []
lines = sent_str.strip().split('\n') lines = sent_str.strip().split('\n')
raw_text = lines.pop(0) raw_text = lines.pop(0).strip()
tok_text = lines.pop(0) tok_text = lines.pop(0).strip()
for i, line in enumerate(lines): for i, line in enumerate(lines):
id_, word, pos_string, head_idx, label = _parse_line(line) id_, word, pos_string, head_idx, label = _parse_line(line)
if label == 'root': if label == 'root':
@ -234,6 +234,7 @@ def evaluate(Language, dev_loc, model_dir, gold_preproc=False):
skipped = 0 skipped = 0
loss = 0 loss = 0
with codecs.open(dev_loc, 'r', 'utf8') as file_: with codecs.open(dev_loc, 'r', 'utf8') as file_:
#paragraphs = read_tokenized_gold(file_)
paragraphs = read_docparse_gold(file_) paragraphs = read_docparse_gold(file_)
for tokens, tag_strs, heads, labels in iter_data(paragraphs, nlp.tokenizer, for tokens, tag_strs, heads, labels in iter_data(paragraphs, nlp.tokenizer,
gold_preproc=gold_preproc): gold_preproc=gold_preproc):
@ -241,11 +242,7 @@ def evaluate(Language, dev_loc, model_dir, gold_preproc=False):
nlp.tagger(tokens) nlp.tagger(tokens)
nlp.parser(tokens) nlp.parser(tokens)
for i, token in enumerate(tokens): for i, token in enumerate(tokens):
try:
pos_corr += token.tag_ == tag_strs[i] pos_corr += token.tag_ == tag_strs[i]
except:
print i, token.orth_, token.tag
raise
n_tokens += 1 n_tokens += 1
if heads[i] is None: if heads[i] is None:
skipped += 1 skipped += 1