fixed bug in training ner documentation and example

This commit is contained in:
Callum Kift 2017-06-30 09:56:33 +02:00
parent f69ff15089
commit dfaeee1f37
2 changed files with 2 additions and 2 deletions

View File

@ -52,6 +52,7 @@ def train_ner(nlp, train_data, output_dir):
random.shuffle(train_data) random.shuffle(train_data)
loss = 0. loss = 0.
for raw_text, entity_offsets in train_data: for raw_text, entity_offsets in train_data:
doc = nlp.make_doc(raw_text)
gold = GoldParse(doc, entities=entity_offsets) gold = GoldParse(doc, entities=entity_offsets)
# By default, the GoldParse class assumes that the entities # By default, the GoldParse class assumes that the entities
# described by offset are complete, and all other words should # described by offset are complete, and all other words should
@ -63,7 +64,6 @@ def train_ner(nlp, train_data, output_dir):
#for i in range(len(gold.ner)): #for i in range(len(gold.ner)):
#if not gold.ner[i].endswith('ANIMAL'): #if not gold.ner[i].endswith('ANIMAL'):
# gold.ner[i] = '-' # gold.ner[i] = '-'
doc = nlp.make_doc(raw_text)
nlp.tagger(doc) nlp.tagger(doc)
# As of 1.9, spaCy's parser now lets you supply a dropout probability # As of 1.9, spaCy's parser now lets you supply a dropout probability
# This might help the model generalize better from only a few # This might help the model generalize better from only a few

View File

@ -150,8 +150,8 @@ p
for itn in range(20): for itn in range(20):
random.shuffle(train_data) random.shuffle(train_data)
for raw_text, entity_offsets in train_data: for raw_text, entity_offsets in train_data:
gold = GoldParse(doc, entities=entity_offsets)
doc = nlp.make_doc(raw_text) doc = nlp.make_doc(raw_text)
gold = GoldParse(doc, entities=entity_offsets)
nlp.tagger(doc) nlp.tagger(doc)
loss = nlp.entity.update(doc, gold) loss = nlp.entity.update(doc, gold)
nlp.end_training() nlp.end_training()