Remove Example.doc property

Remove Example.doc

Remove Example.doc

Remove Example.doc

Remove Example.doc
This commit is contained in:
Matthew Honnibal 2020-06-21 23:24:22 +02:00
parent 3cc267d600
commit 3354758351
5 changed files with 8 additions and 12 deletions

View File

@ -335,7 +335,7 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
) )
) )
n_words = sum(len(ex.doc) for ex in dev_examples) n_words = sum(len(ex.predicted) for ex in dev_examples)
start_time = timer() start_time = timer()
if optimizer.averages: if optimizer.averages:
@ -466,7 +466,7 @@ def train_while_improving(
def subdivide_batch(batch, accumulate_gradient): def subdivide_batch(batch, accumulate_gradient):
batch = list(batch) batch = list(batch)
batch.sort(key=lambda eg: len(eg.doc)) batch.sort(key=lambda eg: len(eg.predicted))
sub_len = len(batch) // accumulate_gradient sub_len = len(batch) // accumulate_gradient
start = 0 start = 0
for i in range(accumulate_gradient): for i in range(accumulate_gradient):

View File

@ -192,10 +192,6 @@ cdef class Example:
def __get__(self): def __get__(self):
return self.x.text return self.x.text
property doc:
def __get__(self):
return self.x
def __str__(self): def __str__(self):
return str(self.to_dict()) return str(self.to_dict())

View File

@ -731,7 +731,7 @@ class Language(object):
scorer = Scorer(pipeline=self.pipeline) scorer = Scorer(pipeline=self.pipeline)
if component_cfg is None: if component_cfg is None:
component_cfg = {} component_cfg = {}
docs = (eg.predicted for eg in examples) docs = list(eg.predicted for eg in examples)
for name, pipe in self.pipeline: for name, pipe in self.pipeline:
kwargs = component_cfg.get(name, {}) kwargs = component_cfg.get(name, {})
kwargs.setdefault("batch_size", batch_size) kwargs.setdefault("batch_size", batch_size)
@ -739,7 +739,7 @@ class Language(object):
docs = _pipe(docs, pipe, kwargs) docs = _pipe(docs, pipe, kwargs)
else: else:
docs = pipe.pipe(docs, **kwargs) docs = pipe.pipe(docs, **kwargs)
for doc, eg in zip(docs, examples): for i, (doc, eg) in enumerate(zip(docs, examples)):
if verbose: if verbose:
print(doc) print(doc)
eg.predicted = doc eg.predicted = doc

View File

@ -1107,7 +1107,7 @@ class EntityLinker(Pipe):
for eg in examples: for eg in examples:
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True) kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
for ent in eg.doc.ents: for ent in eg.predicted.ents:
kb_id = kb_ids[ent.start] # KB ID of the first token is the same as the whole span kb_id = kb_ids[ent.start] # KB ID of the first token is the same as the whole span
if kb_id: if kb_id:
try: try:
@ -1138,7 +1138,7 @@ class EntityLinker(Pipe):
entity_encodings = [] entity_encodings = []
for eg in examples: for eg in examples:
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True) kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
for ent in eg.doc.ents: for ent in eg.predicted.ents:
kb_id = kb_ids[ent.start] kb_id = kb_ids[ent.start]
if kb_id: if kb_id:
entity_encoding = self.kb.get_vector(kb_id) entity_encoding = self.kb.get_vector(kb_id)
@ -1158,7 +1158,7 @@ class EntityLinker(Pipe):
cats = [] cats = []
for eg in examples: for eg in examples:
kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True) kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True)
for ent in eg.doc.ents: for ent in eg.predicted.ents:
kb_id = kb_ids[ent.start] kb_id = kb_ids[ent.start]
if kb_id: if kb_id:
cats.append([1.0]) cats.append([1.0])

View File

@ -93,7 +93,7 @@ class Tok2Vec(Pipe):
""" """
if losses is None: if losses is None:
losses = {} losses = {}
docs = [eg.doc for eg in examples] docs = [eg.predicted for eg in examples]
if isinstance(docs, Doc): if isinstance(docs, Doc):
docs = [docs] docs = [docs]
set_dropout_rate(self.model, drop) set_dropout_rate(self.model, drop)