mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-27 20:33:42 +03:00
Revert tagger.add_label() changes, to fix model
This commit is contained in:
parent
f5855e539b
commit
b84d99b281
|
@ -307,7 +307,6 @@ class Tensorizer(Pipe):
|
||||||
return tokvecs, bp_tokvecs
|
return tokvecs, bp_tokvecs
|
||||||
|
|
||||||
def get_loss(self, docs, golds, scores):
|
def get_loss(self, docs, golds, scores):
|
||||||
# TODO: implement
|
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def begin_training(self, gold_tuples=tuple(), pipeline=None):
|
def begin_training(self, gold_tuples=tuple(), pipeline=None):
|
||||||
|
@ -336,11 +335,7 @@ class Tagger(Pipe):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def labels(self):
|
def labels(self):
|
||||||
return self.cfg.setdefault('tag_names', [])
|
return self.vocab.morphology.tag_names
|
||||||
|
|
||||||
@labels.setter
|
|
||||||
def labels(self, value):
|
|
||||||
self.cfg['tag_names'] = value
|
|
||||||
|
|
||||||
def __call__(self, doc):
|
def __call__(self, doc):
|
||||||
tags = self.predict([doc])
|
tags = self.predict([doc])
|
||||||
|
@ -369,7 +364,6 @@ class Tagger(Pipe):
|
||||||
cdef Doc doc
|
cdef Doc doc
|
||||||
cdef int idx = 0
|
cdef int idx = 0
|
||||||
cdef Vocab vocab = self.vocab
|
cdef Vocab vocab = self.vocab
|
||||||
tags = list(self.labels)
|
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
doc_tag_ids = batch_tag_ids[i]
|
doc_tag_ids = batch_tag_ids[i]
|
||||||
if hasattr(doc_tag_ids, 'get'):
|
if hasattr(doc_tag_ids, 'get'):
|
||||||
|
@ -377,7 +371,7 @@ class Tagger(Pipe):
|
||||||
for j, tag_id in enumerate(doc_tag_ids):
|
for j, tag_id in enumerate(doc_tag_ids):
|
||||||
# Don't clobber preset POS tags
|
# Don't clobber preset POS tags
|
||||||
if doc.c[j].tag == 0 and doc.c[j].pos == 0:
|
if doc.c[j].tag == 0 and doc.c[j].pos == 0:
|
||||||
vocab.morphology.assign_tag(&doc.c[j], tags[tag_id])
|
vocab.morphology.assign_tag_id(&doc.c[j], tag_id)
|
||||||
idx += 1
|
idx += 1
|
||||||
doc.is_tagged = True
|
doc.is_tagged = True
|
||||||
|
|
||||||
|
@ -425,12 +419,9 @@ class Tagger(Pipe):
|
||||||
new_tag_map[tag] = {POS: X}
|
new_tag_map[tag] = {POS: X}
|
||||||
cdef Vocab vocab = self.vocab
|
cdef Vocab vocab = self.vocab
|
||||||
if new_tag_map:
|
if new_tag_map:
|
||||||
new_tag_map.update(orig_tag_map)
|
|
||||||
vocab.morphology = Morphology(vocab.strings, new_tag_map,
|
vocab.morphology = Morphology(vocab.strings, new_tag_map,
|
||||||
vocab.morphology.lemmatizer,
|
vocab.morphology.lemmatizer,
|
||||||
exc=vocab.morphology.exc)
|
exc=vocab.morphology.exc)
|
||||||
for tag in vocab.morphology.tag_names:
|
|
||||||
self.add_label(tag)
|
|
||||||
if self.model is True:
|
if self.model is True:
|
||||||
self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
|
self.cfg['pretrained_dims'] = self.vocab.vectors.data.shape[1]
|
||||||
self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
|
self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
|
||||||
|
@ -443,14 +434,15 @@ class Tagger(Pipe):
|
||||||
def add_label(self, label):
|
def add_label(self, label):
|
||||||
if label in self.labels:
|
if label in self.labels:
|
||||||
return 0
|
return 0
|
||||||
if self.model not in (True, False, None):
|
raise NotImplementedError
|
||||||
smaller = self.model._layers[-1]
|
#if self.model not in (True, False, None):
|
||||||
larger = Softmax(len(self.labels)+1, smaller.nI)
|
# smaller = self.model._layers[-1]
|
||||||
copy_array(larger.W[:smaller.nO], smaller.W)
|
# larger = Softmax(len(self.labels)+1, smaller.nI)
|
||||||
copy_array(larger.b[:smaller.nO], smaller.b)
|
# copy_array(larger.W[:smaller.nO], smaller.W)
|
||||||
self.model._layers[-1] = larger
|
# copy_array(larger.b[:smaller.nO], smaller.b)
|
||||||
self.labels.append(label)
|
# self.model._layers[-1] = larger
|
||||||
return 1
|
#self.labels.append(label)
|
||||||
|
#return 1
|
||||||
|
|
||||||
def use_params(self, params):
|
def use_params(self, params):
|
||||||
with self.model.use_params(params):
|
with self.model.use_params(params):
|
||||||
|
@ -484,12 +476,11 @@ class Tagger(Pipe):
|
||||||
self.vocab.strings, tag_map=tag_map,
|
self.vocab.strings, tag_map=tag_map,
|
||||||
lemmatizer=self.vocab.morphology.lemmatizer,
|
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||||
exc=self.vocab.morphology.exc)
|
exc=self.vocab.morphology.exc)
|
||||||
for tag in self.vocab.morphology.tag_names:
|
|
||||||
self.add_label(tag)
|
|
||||||
|
|
||||||
deserialize = OrderedDict((
|
deserialize = OrderedDict((
|
||||||
('vocab', lambda b: self.vocab.from_bytes(b)),
|
('vocab', lambda b: self.vocab.from_bytes(b)),
|
||||||
('tag_map', load_tag_map),
|
('tag_map', load_tag_map),
|
||||||
|
('cfg', lambda b: self.cfg.update(ujson.loads(b))),
|
||||||
('model', lambda b: load_model(b)),
|
('model', lambda b: load_model(b)),
|
||||||
))
|
))
|
||||||
util.from_bytes(bytes_data, deserialize, exclude)
|
util.from_bytes(bytes_data, deserialize, exclude)
|
||||||
|
@ -521,8 +512,6 @@ class Tagger(Pipe):
|
||||||
self.vocab.strings, tag_map=tag_map,
|
self.vocab.strings, tag_map=tag_map,
|
||||||
lemmatizer=self.vocab.morphology.lemmatizer,
|
lemmatizer=self.vocab.morphology.lemmatizer,
|
||||||
exc=self.vocab.morphology.exc)
|
exc=self.vocab.morphology.exc)
|
||||||
for tag in self.vocab.morphology.tag_names:
|
|
||||||
self.add_label(tag)
|
|
||||||
|
|
||||||
deserialize = OrderedDict((
|
deserialize = OrderedDict((
|
||||||
('cfg', lambda p: self.cfg.update(_load_cfg(p))),
|
('cfg', lambda p: self.cfg.update(_load_cfg(p))),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user