mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-13 09:33:18 +03:00
* Make very hacky modifications to parser training script, to get Chinese up and running.
This commit is contained in:
parent
b1cf2c16c3
commit
588026fe93
|
@ -41,17 +41,21 @@ def _corrupt(c, noise_level):
|
||||||
|
|
||||||
|
|
||||||
def add_noise(orig, noise_level):
|
def add_noise(orig, noise_level):
|
||||||
if random.random() >= noise_level:
|
# TODO
|
||||||
return orig
|
return orig.replace(' ', '')
|
||||||
elif type(orig) == list:
|
#if random.random() >= noise_level:
|
||||||
corrupted = [_corrupt(word, noise_level) for word in orig]
|
# return orig
|
||||||
corrupted = [w for w in corrupted if w]
|
#elif type(orig) == list:
|
||||||
return corrupted
|
# corrupted = [_corrupt(word, noise_level) for word in orig]
|
||||||
else:
|
# corrupted = [w for w in corrupted if w]
|
||||||
return ''.join(_corrupt(c, noise_level) for c in orig)
|
# return corrupted
|
||||||
|
#else:
|
||||||
|
# return ''.join(_corrupt(c, noise_level) for c in orig)
|
||||||
|
|
||||||
|
|
||||||
def score_model(scorer, nlp, raw_text, annot_tuples, verbose=False):
|
def score_model(scorer, nlp, raw_text, annot_tuples, verbose=False):
|
||||||
|
# TODO
|
||||||
|
raw_text = raw_text.replace(' ', '')
|
||||||
if raw_text is None:
|
if raw_text is None:
|
||||||
tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
|
tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
|
||||||
else:
|
else:
|
||||||
|
@ -139,7 +143,10 @@ def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic',
|
||||||
nlp.tagger(tokens)
|
nlp.tagger(tokens)
|
||||||
gold = GoldParse(tokens, annot_tuples)
|
gold = GoldParse(tokens, annot_tuples)
|
||||||
if not gold.is_projective:
|
if not gold.is_projective:
|
||||||
raise Exception("Non-projective sentence in training: %s" % annot_tuples[1])
|
# TODO
|
||||||
|
nlp.tagger.train(tokens, gold.tags)
|
||||||
|
continue
|
||||||
|
#raise Exception("Non-projective sentence in training: %s" % annot_tuples[1])
|
||||||
loss += nlp.parser.train(tokens, gold)
|
loss += nlp.parser.train(tokens, gold)
|
||||||
nlp.entity.train(tokens, gold)
|
nlp.entity.train(tokens, gold)
|
||||||
nlp.tagger.train(tokens, gold.tags)
|
nlp.tagger.train(tokens, gold.tags)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user