mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Merge branch 'feature/lemmatizer' of https://github.com/explosion/spaCy into feature/lemmatizer
This commit is contained in:
commit
b6d60d0041
|
@ -350,7 +350,6 @@ def initialize_pipeline(nlp, docs, golds, config, device):
|
||||||
nlp.add_pipe(nlp.create_pipe("tagger", config={"set_morphology": False}))
|
nlp.add_pipe(nlp.create_pipe("tagger", config={"set_morphology": False}))
|
||||||
nlp.add_pipe(nlp.create_pipe("morphologizer"))
|
nlp.add_pipe(nlp.create_pipe("morphologizer"))
|
||||||
nlp.add_pipe(nlp.create_pipe("parser"))
|
nlp.add_pipe(nlp.create_pipe("parser"))
|
||||||
assert not nlp.get_pipe("tagger").set_morphology
|
|
||||||
if config.multitask_tag:
|
if config.multitask_tag:
|
||||||
nlp.parser.add_multitask_objective("tag")
|
nlp.parser.add_multitask_objective("tag")
|
||||||
if config.multitask_sent:
|
if config.multitask_sent:
|
||||||
|
|
|
@ -357,14 +357,6 @@ class Tagger(Pipe):
|
||||||
self.cfg = OrderedDict(sorted(cfg.items()))
|
self.cfg = OrderedDict(sorted(cfg.items()))
|
||||||
self.cfg.setdefault("cnn_maxout_pieces", 2)
|
self.cfg.setdefault("cnn_maxout_pieces", 2)
|
||||||
|
|
||||||
@property
|
|
||||||
def set_morphology(self):
|
|
||||||
return self.cfg.get("set_morphology", True)
|
|
||||||
|
|
||||||
@set_morphology.setter
|
|
||||||
def set_morphology(self, value):
|
|
||||||
self.cfg["set_morphology"] = value
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def labels(self):
|
def labels(self):
|
||||||
return tuple(self.vocab.morphology.tag_names)
|
return tuple(self.vocab.morphology.tag_names)
|
||||||
|
@ -412,6 +404,7 @@ class Tagger(Pipe):
|
||||||
cdef Doc doc
|
cdef Doc doc
|
||||||
cdef int idx = 0
|
cdef int idx = 0
|
||||||
cdef Vocab vocab = self.vocab
|
cdef Vocab vocab = self.vocab
|
||||||
|
assign_morphology = self.cfg.get("set_morphology", True)
|
||||||
for i, doc in enumerate(docs):
|
for i, doc in enumerate(docs):
|
||||||
doc_tag_ids = batch_tag_ids[i]
|
doc_tag_ids = batch_tag_ids[i]
|
||||||
if hasattr(doc_tag_ids, "get"):
|
if hasattr(doc_tag_ids, "get"):
|
||||||
|
@ -419,7 +412,7 @@ class Tagger(Pipe):
|
||||||
for j, tag_id in enumerate(doc_tag_ids):
|
for j, tag_id in enumerate(doc_tag_ids):
|
||||||
# Don't clobber preset POS tags
|
# Don't clobber preset POS tags
|
||||||
if doc.c[j].tag == 0:
|
if doc.c[j].tag == 0:
|
||||||
if doc.c[j].pos == 0 and self.set_morphology:
|
if doc.c[j].pos == 0 and assign_morphology:
|
||||||
# Don't clobber preset lemmas
|
# Don't clobber preset lemmas
|
||||||
lemma = doc.c[j].lemma
|
lemma = doc.c[j].lemma
|
||||||
vocab.morphology.assign_tag_id(&doc.c[j], tag_id)
|
vocab.morphology.assign_tag_id(&doc.c[j], tag_id)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user