diff --git a/spacy/cli/train.py b/spacy/cli/train.py index 6a1d74934..06e9be15b 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -335,7 +335,7 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg): ) ) - n_words = sum(len(ex.doc) for ex in dev_examples) + n_words = sum(len(ex.predicted) for ex in dev_examples) start_time = timer() if optimizer.averages: @@ -466,7 +466,7 @@ def train_while_improving( def subdivide_batch(batch, accumulate_gradient): batch = list(batch) - batch.sort(key=lambda eg: len(eg.doc)) + batch.sort(key=lambda eg: len(eg.predicted)) sub_len = len(batch) // accumulate_gradient start = 0 for i in range(accumulate_gradient): diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx index e90d8a42a..7ddc59cda 100644 --- a/spacy/gold/example.pyx +++ b/spacy/gold/example.pyx @@ -192,10 +192,6 @@ cdef class Example: def __get__(self): return self.x.text - property doc: - def __get__(self): - return self.x - def __str__(self): return str(self.to_dict()) diff --git a/spacy/language.py b/spacy/language.py index b9a84e1bb..01a31400a 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -731,7 +731,7 @@ class Language(object): scorer = Scorer(pipeline=self.pipeline) if component_cfg is None: component_cfg = {} - docs = (eg.predicted for eg in examples) + docs = list(eg.predicted for eg in examples) for name, pipe in self.pipeline: kwargs = component_cfg.get(name, {}) kwargs.setdefault("batch_size", batch_size) @@ -739,7 +739,7 @@ class Language(object): docs = _pipe(docs, pipe, kwargs) else: docs = pipe.pipe(docs, **kwargs) - for doc, eg in zip(docs, examples): + for i, (doc, eg) in enumerate(zip(docs, examples)): if verbose: print(doc) eg.predicted = doc diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index d95d6766a..8bdfa0941 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -1107,7 +1107,7 @@ class EntityLinker(Pipe): for eg in examples: kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True) - for ent in eg.doc.ents: + for ent in eg.predicted.ents: kb_id = kb_ids[ent.start] # KB ID of the first token is the same as the whole span if kb_id: try: @@ -1138,7 +1138,7 @@ class EntityLinker(Pipe): entity_encodings = [] for eg in examples: kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True) - for ent in eg.doc.ents: + for ent in eg.predicted.ents: kb_id = kb_ids[ent.start] if kb_id: entity_encoding = self.kb.get_vector(kb_id) @@ -1158,7 +1158,7 @@ class EntityLinker(Pipe): cats = [] for eg in examples: kb_ids = eg.get_aligned("ENT_KB_ID", as_string=True) - for ent in eg.doc.ents: + for ent in eg.predicted.ents: kb_id = kb_ids[ent.start] if kb_id: cats.append([1.0]) diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py index 75654145b..047cf5caa 100644 --- a/spacy/pipeline/tok2vec.py +++ b/spacy/pipeline/tok2vec.py @@ -93,7 +93,7 @@ class Tok2Vec(Pipe): """ if losses is None: losses = {} - docs = [eg.doc for eg in examples] + docs = [eg.predicted for eg in examples] if isinstance(docs, Doc): docs = [docs] set_dropout_rate(self.model, drop)