Update the TAG_MAP

This commit is contained in:
Paul O'Leary McCann 2017-08-22 00:02:55 +09:00
parent 53e17296e9
commit adfd987316
2 changed files with 2 additions and 4 deletions

View File

@ -87,9 +87,6 @@ class JapaneseTagger(object):
# 1. get raw JP tags
# 2. add features to tags as necessary for UD
# TODO: if the text has been tokenized, this info is already available
# How to set the data when tokenizing or save it for the tagger to find?
dtokens = detailed_tokens(self.tokenizer, tokens.text)
rawtags = list(map(resolve_pos, dtokens))
self.tagger.tag_from_strings(tokens, rawtags)

View File

@ -81,7 +81,8 @@ TAG_MAP = {
"名詞,普通名詞,助数詞可能,*":{POS: NOUN}, # counter / unit
"名詞,普通名詞,副詞可能,*":{POS: NOUN},
"連体詞,*,*,*":{POS: ADJ}, # XXX note この、その etc. should be DET
"連体詞,*,*,*":{POS: ADJ}, # XXX this has exceptions based on literal token
"連体詞,*,*,*,ADJ":{POS: ADJ},
"連体詞,*,*,*,PRON":{POS: PRON},
"連体詞,*,*,*,DET":{POS: DET},
}