diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 2512c179f..5729af667 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -213,7 +213,7 @@ class GoldCorpus(object): train_tuples = self.train_tuples if projectivize: train_tuples = nonproj.preprocess_training_data( - self.train_tuples) + self.train_tuples, label_freq_cutoff=100) random.shuffle(train_tuples) gold_docs = self.iter_gold_docs(nlp, train_tuples, gold_preproc, max_length=max_length, diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index a8a1d4334..9288b523f 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -809,7 +809,7 @@ cdef class Parser: def begin_training(self, gold_tuples, pipeline=None, **cfg): if 'model' in cfg: self.model = cfg['model'] - gold_tuples = nonproj.preprocess_training_data(gold_tuples) + gold_tuples = nonproj.preprocess_training_data(gold_tuples, label_freq_cutoff=100) actions = self.moves.get_actions(gold_parses=gold_tuples) for action, labels in actions.items(): for label in labels: