Preserve _SP when filtering tag map in Tagger

To allow "SP" as a tag (for Chinese OntoNotes), preserve "_SP" if
present as the reference `SPACE` POS in the tag map in
`Tagger.begin_training()`.
This commit is contained in:
Adriane Boyd 2020-05-31 19:57:54 +02:00
parent 758a4b154d
commit a005ccd6d7

View File

@ -526,6 +526,8 @@ class Tagger(Pipe):
new_tag_map[tag] = orig_tag_map[tag]
else:
new_tag_map[tag] = {POS: X}
if "_SP" in orig_tag_map:
new_tag_map["_SP"] = orig_tag_map["_SP"]
cdef Vocab vocab = self.vocab
if new_tag_map:
vocab.morphology = Morphology(vocab.strings, new_tag_map,