mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-03 03:13:08 +03:00
* Allow an OOV probability to be specified in get_lex_props
This commit is contained in:
parent
5c04dcd7c1
commit
cd6e25132b
|
@ -25,7 +25,7 @@ from ..util import read_lang_data
|
||||||
from ..attrs import TAG, HEAD, DEP, ENT_TYPE, ENT_IOB
|
from ..attrs import TAG, HEAD, DEP, ENT_TYPE, ENT_IOB
|
||||||
|
|
||||||
|
|
||||||
def get_lex_props(string):
|
def get_lex_props(string, oov_prob=-30):
|
||||||
return {
|
return {
|
||||||
'flags': get_flags(string),
|
'flags': get_flags(string),
|
||||||
'length': len(string),
|
'length': len(string),
|
||||||
|
@ -36,7 +36,7 @@ def get_lex_props(string):
|
||||||
'prefix': string[0],
|
'prefix': string[0],
|
||||||
'suffix': string[-3:],
|
'suffix': string[-3:],
|
||||||
'cluster': 0,
|
'cluster': 0,
|
||||||
'prob': -22,
|
'prob': oov_prob,
|
||||||
'sentiment': 0
|
'sentiment': 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,7 +83,8 @@ class English(object):
|
||||||
|
|
||||||
self.vocab = Vocab(data_dir=path.join(data_dir, 'vocab') if data_dir else None,
|
self.vocab = Vocab(data_dir=path.join(data_dir, 'vocab') if data_dir else None,
|
||||||
get_lex_props=get_lex_props, load_vectors=load_vectors,
|
get_lex_props=get_lex_props, load_vectors=load_vectors,
|
||||||
pos_tags=POS_TAGS)
|
pos_tags=POS_TAGS,
|
||||||
|
float(open(path.join(data_dir, 'vocab', 'oov_prob')).read()))
|
||||||
if Tagger is True:
|
if Tagger is True:
|
||||||
Tagger = EnPosTagger
|
Tagger = EnPosTagger
|
||||||
if Parser is True:
|
if Parser is True:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user