mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Clarify gold.pyx slightly
This commit is contained in:
parent
43353b5413
commit
e62f46d39f
|
@ -211,7 +211,7 @@ class GoldCorpus(object):
|
||||||
|
|
||||||
def dev_docs(self, nlp, gold_preproc=False):
|
def dev_docs(self, nlp, gold_preproc=False):
|
||||||
gold_docs = self.iter_gold_docs(nlp, self.dev_tuples, gold_preproc)
|
gold_docs = self.iter_gold_docs(nlp, self.dev_tuples, gold_preproc)
|
||||||
gold_docs = nlp.preprocess_gold(gold_docs)
|
#gold_docs = nlp.preprocess_gold(gold_docs)
|
||||||
yield from gold_docs
|
yield from gold_docs
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -226,7 +226,7 @@ class GoldCorpus(object):
|
||||||
gold_preproc)
|
gold_preproc)
|
||||||
golds = cls._make_golds(docs, paragraph_tuples)
|
golds = cls._make_golds(docs, paragraph_tuples)
|
||||||
for doc, gold in zip(docs, golds):
|
for doc, gold in zip(docs, golds):
|
||||||
if not max_length or len(doc) < max_length:
|
if (not max_length) or len(doc) < max_length:
|
||||||
yield doc, gold
|
yield doc, gold
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -234,17 +234,17 @@ class GoldCorpus(object):
|
||||||
if raw_text is not None:
|
if raw_text is not None:
|
||||||
return [nlp.make_doc(raw_text)]
|
return [nlp.make_doc(raw_text)]
|
||||||
else:
|
else:
|
||||||
return [Doc(nlp.vocab, words=sent_tuples[0][1])
|
return [Doc(nlp.vocab, words=sent_tuples[1])
|
||||||
for sent_tuples in paragraph_tuples]
|
for (sent_tuples, brackets) in paragraph_tuples]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _make_golds(cls, docs, paragraph_tuples):
|
def _make_golds(cls, docs, paragraph_tuples):
|
||||||
|
assert len(docs) == len(paragraph_tuples)
|
||||||
if len(docs) == 1:
|
if len(docs) == 1:
|
||||||
return [GoldParse.from_annot_tuples(docs[0], sent_tuples[0])
|
return [GoldParse.from_annot_tuples(docs[0], paragraph_tuples[0][0])]
|
||||||
for sent_tuples in paragraph_tuples]
|
|
||||||
else:
|
else:
|
||||||
return [GoldParse.from_annot_tuples(doc, sent_tuples[0])
|
return [GoldParse.from_annot_tuples(doc, sent_tuples)
|
||||||
for doc, sent_tuples in zip(docs, paragraph_tuples)]
|
for doc, (sent_tuples, brackets) in zip(docs, paragraph_tuples)]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def walk_corpus(path):
|
def walk_corpus(path):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user