Clarify gold.pyx slightly

This commit is contained in:
Matthew Honnibal 2017-06-03 13:28:52 -05:00
parent 43353b5413
commit e62f46d39f

View File

@ -211,7 +211,7 @@ class GoldCorpus(object):
def dev_docs(self, nlp, gold_preproc=False): def dev_docs(self, nlp, gold_preproc=False):
gold_docs = self.iter_gold_docs(nlp, self.dev_tuples, gold_preproc) gold_docs = self.iter_gold_docs(nlp, self.dev_tuples, gold_preproc)
gold_docs = nlp.preprocess_gold(gold_docs) #gold_docs = nlp.preprocess_gold(gold_docs)
yield from gold_docs yield from gold_docs
@classmethod @classmethod
@ -226,7 +226,7 @@ class GoldCorpus(object):
gold_preproc) gold_preproc)
golds = cls._make_golds(docs, paragraph_tuples) golds = cls._make_golds(docs, paragraph_tuples)
for doc, gold in zip(docs, golds): for doc, gold in zip(docs, golds):
if not max_length or len(doc) < max_length: if (not max_length) or len(doc) < max_length:
yield doc, gold yield doc, gold
@classmethod @classmethod
@ -234,17 +234,17 @@ class GoldCorpus(object):
if raw_text is not None: if raw_text is not None:
return [nlp.make_doc(raw_text)] return [nlp.make_doc(raw_text)]
else: else:
return [Doc(nlp.vocab, words=sent_tuples[0][1]) return [Doc(nlp.vocab, words=sent_tuples[1])
for sent_tuples in paragraph_tuples] for (sent_tuples, brackets) in paragraph_tuples]
@classmethod @classmethod
def _make_golds(cls, docs, paragraph_tuples): def _make_golds(cls, docs, paragraph_tuples):
assert len(docs) == len(paragraph_tuples)
if len(docs) == 1: if len(docs) == 1:
return [GoldParse.from_annot_tuples(docs[0], sent_tuples[0]) return [GoldParse.from_annot_tuples(docs[0], paragraph_tuples[0][0])]
for sent_tuples in paragraph_tuples]
else: else:
return [GoldParse.from_annot_tuples(doc, sent_tuples[0]) return [GoldParse.from_annot_tuples(doc, sent_tuples)
for doc, sent_tuples in zip(docs, paragraph_tuples)] for doc, (sent_tuples, brackets) in zip(docs, paragraph_tuples)]
@staticmethod @staticmethod
def walk_corpus(path): def walk_corpus(path):