From dfaeee1f37d8b7b614e55cd732c6c89abb9afd92 Mon Sep 17 00:00:00 2001 From: Callum Kift Date: Fri, 30 Jun 2017 09:56:33 +0200 Subject: [PATCH] fixed bug in training ner documentation and example --- examples/training/train_new_entity_type.py | 2 +- website/docs/usage/training-ner.jade | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/training/train_new_entity_type.py b/examples/training/train_new_entity_type.py index 4eae11c75..987ab5859 100644 --- a/examples/training/train_new_entity_type.py +++ b/examples/training/train_new_entity_type.py @@ -52,6 +52,7 @@ def train_ner(nlp, train_data, output_dir): random.shuffle(train_data) loss = 0. for raw_text, entity_offsets in train_data: + doc = nlp.make_doc(raw_text) gold = GoldParse(doc, entities=entity_offsets) # By default, the GoldParse class assumes that the entities # described by offset are complete, and all other words should @@ -63,7 +64,6 @@ def train_ner(nlp, train_data, output_dir): #for i in range(len(gold.ner)): #if not gold.ner[i].endswith('ANIMAL'): # gold.ner[i] = '-' - doc = nlp.make_doc(raw_text) nlp.tagger(doc) # As of 1.9, spaCy's parser now lets you supply a dropout probability # This might help the model generalize better from only a few diff --git a/website/docs/usage/training-ner.jade b/website/docs/usage/training-ner.jade index 78eb4905e..52eedd21e 100644 --- a/website/docs/usage/training-ner.jade +++ b/website/docs/usage/training-ner.jade @@ -150,8 +150,8 @@ p for itn in range(20): random.shuffle(train_data) for raw_text, entity_offsets in train_data: - gold = GoldParse(doc, entities=entity_offsets) doc = nlp.make_doc(raw_text) + gold = GoldParse(doc, entities=entity_offsets) nlp.tagger(doc) loss = nlp.entity.update(doc, gold) nlp.end_training()