Allow Language.update to take unicode text and dict objects

This commit is contained in:
Matthew Honnibal 2017-11-06 22:07:38 +01:00
parent 1831dbd065
commit 45e0617e61

View File

@ -17,7 +17,8 @@ from .vocab import Vocab
from .lemmatizer import Lemmatizer from .lemmatizer import Lemmatizer
from .pipeline import DependencyParser, Tensorizer, Tagger, EntityRecognizer from .pipeline import DependencyParser, Tensorizer, Tagger, EntityRecognizer
from .pipeline import SimilarityHook, TextCategorizer, SentenceSegmenter from .pipeline import SimilarityHook, TextCategorizer, SentenceSegmenter
from .compat import json_dumps, izip from .compat import json_dumps, izip, basestring_
from .gold import GoldParse
from .scorer import Scorer from .scorer import Scorer
from ._ml import link_vectors_to_models, create_default_optimizer from ._ml import link_vectors_to_models, create_default_optimizer
from .attrs import IS_STOP from .attrs import IS_STOP
@ -377,8 +378,21 @@ class Language(object):
return return
if sgd is None: if sgd is None:
if self._optimizer is None: if self._optimizer is None:
self._optimizer = Adam(Model.ops, 0.001) self._optimizer = create_default_optimizer(Model.ops)
sgd = self._optimizer sgd = self._optimizer
# Allow dict of args to GoldParse, instead of GoldParse objects.
gold_objs = []
doc_objs = []
for doc, gold in zip(docs, golds):
if isinstance(doc, basestring_):
doc = self.make_doc(doc)
if not isinstance(gold, GoldParse):
gold = GoldParse(doc, **gold)
doc_objs.append(doc)
gold_objs.append(gold)
golds = gold_objs
docs = doc_objs
grads = {} grads = {}
def get_grads(W, dW, key=None): def get_grads(W, dW, key=None):