mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 13:47:13 +03:00
* Add hacky distribution over supersenses, using a half-assed thing like a stick-breaking process
This commit is contained in:
parent
153758bf65
commit
00c9acbf42
|
@ -82,12 +82,32 @@ def get_og_to_ssenses(wordnet_dir, onto_dir):
|
|||
return mapping
|
||||
|
||||
|
||||
def make_supersense_dict(wordnet_dir):
|
||||
sense_to_ssense = get_sense_to_ssense(path.join(wordnet_dir, 'index.sense'))
|
||||
gather = {}
|
||||
for (word, pos, sense), supersense in sense_to_ssense.items():
|
||||
key = (word, pos)
|
||||
gather.setdefault((word, pos), []).append((sense, supersense))
|
||||
mapping = {}
|
||||
for (word, pos), senses in gather.items():
|
||||
n_senses = len(senses)
|
||||
probs = {}
|
||||
remaining = 1.0
|
||||
for sense, supersense in sorted(senses):
|
||||
remaining /= 2
|
||||
probs[supersense] = probs.get(supersense, 0.0) + remaining
|
||||
for sense, supersense in sorted(senses):
|
||||
probs[supersense] += remaining / len(senses)
|
||||
mapping[(word, pos)] = probs
|
||||
return mapping
|
||||
|
||||
|
||||
def main(wordnet_dir, onto_dir):
|
||||
mapping = get_og_to_ssenses(wordnet_dir, onto_dir)
|
||||
print mapping[('dog', 'v', 1)]
|
||||
print mapping[('dog', 'n', 1)]
|
||||
print mapping[('abandon', 'v', 1)]
|
||||
print mapping[('abandon', 'n', 1)]
|
||||
mapping = make_supersense_dict(wordnet_dir)
|
||||
print mapping[('dog', 'v')]
|
||||
print mapping[('dog', 'n')]
|
||||
print mapping[('abandon', 'v')]
|
||||
print mapping[('abandon', 'n')]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in New Issue
Block a user