* Skip sentences of length 1 in training

2025-08-09 06:34:54 +03:00 · 2015-06-05 02:29:03 +02:00 · 2015-06-05 02:29:03 +02:00 · e772b48dcd
commit e772b48dcd
parent 6bf35cecc3
1 changed files with 2 additions and 0 deletions
--- a/bin/parser/train.py
+++ b/bin/parser/train.py
@ -178,6 +178,8 @@ def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic',
            else:
                sents = _merge_sents(sents)
            for annot_tuples, ctnt in sents:
+                if len(annot_tuples[1]) == 1:
+                    continue
                score_model(scorer, nlp, raw_text, annot_tuples, train_tags)
                if raw_text is None:
                    tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])