refer to _parser_internals.nonproj.DELIMITER

This commit is contained in:
svlandeg 2021-01-07 18:58:13 +01:00
parent 411c842a71
commit 1abeca90a6
2 changed files with 5 additions and 3 deletions

View File

@ -12,6 +12,7 @@ from ..training import Example
from ..training.initialize import get_sourced_components from ..training.initialize import get_sourced_components
from ..schemas import ConfigSchemaTraining from ..schemas import ConfigSchemaTraining
from ..pipeline._parser_internals import nonproj from ..pipeline._parser_internals import nonproj
from ..pipeline._parser_internals.nonproj import DELIMITER
from ..language import Language from ..language import Language
from ..util import registry, resolve_dot_names from ..util import registry, resolve_dot_names
from .. import util from .. import util
@ -383,7 +384,7 @@ def debug_data(
# rare labels in projectivized train # rare labels in projectivized train
rare_projectivized_labels = [] rare_projectivized_labels = []
for label in gold_train_data["deps"]: for label in gold_train_data["deps"]:
if gold_train_data["deps"][label] <= DEP_LABEL_THRESHOLD and "||" in label: if gold_train_data["deps"][label] <= DEP_LABEL_THRESHOLD and DELIMITER in label:
rare_projectivized_labels.append( rare_projectivized_labels.append(
f"{label}: {gold_train_data['deps'][label]}" f"{label}: {gold_train_data['deps'][label]}"
) )

View File

@ -9,6 +9,7 @@ from ._parser_internals.arc_eager cimport ArcEager
from .functions import merge_subtokens from .functions import merge_subtokens
from ..language import Language from ..language import Language
from ._parser_internals import nonproj from ._parser_internals import nonproj
from ._parser_internals.nonproj import DELIMITER
from ..scorer import Scorer from ..scorer import Scorer
from ..training import validate_examples from ..training import validate_examples
@ -230,8 +231,8 @@ cdef class DependencyParser(Parser):
for move in self.move_names: for move in self.move_names:
if "-" in move: if "-" in move:
label = move.split("-")[1] label = move.split("-")[1]
if "||" in label: if DELIMITER in label:
label = label.split("||")[1] label = label.split(DELIMITER)[1]
labels.add(label) labels.add(label)
return tuple(sorted(labels)) return tuple(sorted(labels))