diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index d23cd3717..8eabf1f8f 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -12,6 +12,7 @@ from ..training import Example from ..training.initialize import get_sourced_components from ..schemas import ConfigSchemaTraining from ..pipeline._parser_internals import nonproj +from ..pipeline._parser_internals.nonproj import DELIMITER from ..language import Language from ..util import registry, resolve_dot_names from .. import util @@ -383,7 +384,7 @@ def debug_data( # rare labels in projectivized train rare_projectivized_labels = [] for label in gold_train_data["deps"]: - if gold_train_data["deps"][label] <= DEP_LABEL_THRESHOLD and "||" in label: + if gold_train_data["deps"][label] <= DEP_LABEL_THRESHOLD and DELIMITER in label: rare_projectivized_labels.append( f"{label}: {gold_train_data['deps'][label]}" ) diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx index 1fe29eb9b..18c9fd25a 100644 --- a/spacy/pipeline/dep_parser.pyx +++ b/spacy/pipeline/dep_parser.pyx @@ -9,6 +9,7 @@ from ._parser_internals.arc_eager cimport ArcEager from .functions import merge_subtokens from ..language import Language from ._parser_internals import nonproj +from ._parser_internals.nonproj import DELIMITER from ..scorer import Scorer from ..training import validate_examples @@ -230,8 +231,8 @@ cdef class DependencyParser(Parser): for move in self.move_names: if "-" in move: label = move.split("-")[1] - if "||" in label: - label = label.split("||")[1] + if DELIMITER in label: + label = label.split(DELIMITER)[1] labels.add(label) return tuple(sorted(labels))