mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-29 11:26:28 +03:00
Update the TAG_MAP
This commit is contained in:
parent
53e17296e9
commit
adfd987316
|
@ -87,9 +87,6 @@ class JapaneseTagger(object):
|
||||||
# 1. get raw JP tags
|
# 1. get raw JP tags
|
||||||
# 2. add features to tags as necessary for UD
|
# 2. add features to tags as necessary for UD
|
||||||
|
|
||||||
# TODO: if the text has been tokenized, this info is already available
|
|
||||||
# How to set the data when tokenizing or save it for the tagger to find?
|
|
||||||
|
|
||||||
dtokens = detailed_tokens(self.tokenizer, tokens.text)
|
dtokens = detailed_tokens(self.tokenizer, tokens.text)
|
||||||
rawtags = list(map(resolve_pos, dtokens))
|
rawtags = list(map(resolve_pos, dtokens))
|
||||||
self.tagger.tag_from_strings(tokens, rawtags)
|
self.tagger.tag_from_strings(tokens, rawtags)
|
||||||
|
|
|
@ -81,7 +81,8 @@ TAG_MAP = {
|
||||||
"名詞,普通名詞,助数詞可能,*":{POS: NOUN}, # counter / unit
|
"名詞,普通名詞,助数詞可能,*":{POS: NOUN}, # counter / unit
|
||||||
"名詞,普通名詞,副詞可能,*":{POS: NOUN},
|
"名詞,普通名詞,副詞可能,*":{POS: NOUN},
|
||||||
|
|
||||||
"連体詞,*,*,*":{POS: ADJ}, # XXX note この、その etc. should be DET
|
"連体詞,*,*,*":{POS: ADJ}, # XXX this has exceptions based on literal token
|
||||||
"連体詞,*,*,*,ADJ":{POS: ADJ},
|
"連体詞,*,*,*,ADJ":{POS: ADJ},
|
||||||
|
"連体詞,*,*,*,PRON":{POS: PRON},
|
||||||
"連体詞,*,*,*,DET":{POS: DET},
|
"連体詞,*,*,*,DET":{POS: DET},
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user