Remove TokenAnnotation code from nonproj

2025-09-19 02:22:43 +03:00 · 2020-06-15 18:14:47 +02:00 · 2020-06-15 18:14:47 +02:00 · c66f93299e
commit c66f93299e
parent c95494739c
1 changed files with 1 additions and 45 deletions
--- a/spacy/syntax/nonproj.pyx
+++ b/spacy/syntax/nonproj.pyx
@ -7,7 +7,7 @@ from copy import copy
 from ..tokens.doc cimport Doc, set_children_from_heads
-from ..gold import Example, TokenAnnotation
+from ..gold import Example
 from ..errors import Errors
@ -90,31 +90,6 @@ def count_decorated_labels(gold_data):
    return freqs
 def preprocess_training_data(gold_data, label_freq_cutoff=30):
    preprocessed = []
    freqs = {}
    for example in gold_data:
        new_example = Example(doc=example.doc)
        proj_heads, deco_deps = projectivize(example.token_annotation.heads,
                                             example.token_annotation.deps)
        # set the label to ROOT for each root dependent
        deco_deps = ['ROOT' if head == i else deco_deps[i]
                       for i, head in enumerate(proj_heads)]
        # count label frequencies
        if label_freq_cutoff > 0:
            for label in deco_deps:
                if is_decorated(label):
                    freqs[label] = freqs.get(label, 0) + 1
        proj_token_dict = example.token_annotation.to_dict()
        proj_token_dict["heads"] = proj_heads
        proj_token_dict["deps"] = deco_deps
        new_example.token_annotation = TokenAnnotation(**proj_token_dict)
        preprocessed.append(new_example)
    if label_freq_cutoff > 0:
        return _filter_labels(preprocessed, label_freq_cutoff, freqs)
    return preprocessed
 def projectivize(heads, labels):
    # Use the algorithm by Nivre & Nilsson 2005. Assumes heads to be a proper
    # tree, i.e. connected and cycle-free. Returns a new pair (heads, labels)
@ -200,22 +175,3 @@ def _find_new_head(token, headlabel):
                next_queue.append(child)
        queue = next_queue
    return token.head
 def _filter_labels(examples, cutoff, freqs):
    # throw away infrequent decorated labels
    # can't learn them reliably anyway and keeps label set smaller
    filtered = []
    for example in examples:
        new_example = Example(doc=example.doc)
        filtered_labels = []
        for label in example.token_annotation.deps:
            if is_decorated(label) and freqs.get(label, 0) < cutoff:
                filtered_labels.append(decompose(label)[0])
            else:
                filtered_labels.append(label)
        filtered_token_dict = example.token_annotation.to_dict()
        filtered_token_dict["deps"] = filtered_labels
        new_example.token_annotation = TokenAnnotation(**filtered_token_dict)
        filtered.append(new_example)
    return filtered