Prune number of non-projective labels more aggressiely

This commit is contained in:
Matthew Honnibal 2017-10-11 02:46:44 -05:00
parent 73bca3d382
commit 6e552c9d83
2 changed files with 2 additions and 2 deletions

View File

@ -213,7 +213,7 @@ class GoldCorpus(object):
train_tuples = self.train_tuples train_tuples = self.train_tuples
if projectivize: if projectivize:
train_tuples = nonproj.preprocess_training_data( train_tuples = nonproj.preprocess_training_data(
self.train_tuples) self.train_tuples, label_freq_cutoff=100)
random.shuffle(train_tuples) random.shuffle(train_tuples)
gold_docs = self.iter_gold_docs(nlp, train_tuples, gold_preproc, gold_docs = self.iter_gold_docs(nlp, train_tuples, gold_preproc,
max_length=max_length, max_length=max_length,

View File

@ -809,7 +809,7 @@ cdef class Parser:
def begin_training(self, gold_tuples, pipeline=None, **cfg): def begin_training(self, gold_tuples, pipeline=None, **cfg):
if 'model' in cfg: if 'model' in cfg:
self.model = cfg['model'] self.model = cfg['model']
gold_tuples = nonproj.preprocess_training_data(gold_tuples) gold_tuples = nonproj.preprocess_training_data(gold_tuples, label_freq_cutoff=100)
actions = self.moves.get_actions(gold_parses=gold_tuples) actions = self.moves.get_actions(gold_parses=gold_tuples)
for action, labels in actions.items(): for action, labels in actions.items():
for label in labels: for label in labels: