This commit is contained in:
svlandeg 2020-06-19 11:31:01 +02:00 committed by Matthew Honnibal
parent 5477bf054f
commit a427ca9355
4 changed files with 7 additions and 19 deletions

View File

@ -646,20 +646,6 @@ class Language(object):
sgd(W, dW, key=key) sgd(W, dW, key=key)
return losses return losses
def preprocess_gold(self, examples):
"""Can be called before training to pre-process gold data. By default,
it handles nonprojectivity and adds missing tags to the tag map.
examples (iterable): `Example` objects.
YIELDS (tuple): `Example` objects.
"""
# TODO: This is deprecated right?
for name, proc in self.pipeline:
if hasattr(proc, "preprocess_gold"):
examples = proc.preprocess_gold(examples)
for eg in examples:
yield eg
def begin_training(self, get_examples=None, sgd=None, component_cfg=None, **cfg): def begin_training(self, get_examples=None, sgd=None, component_cfg=None, **cfg):
"""Allocate models, pre-process training data and acquire a trainer and """Allocate models, pre-process training data and acquire a trainer and
optimizer. Used as a contextmanager. optimizer. Used as a contextmanager.

View File

@ -459,9 +459,9 @@ cdef class ArcEager(TransitionSystem):
actions[RIGHT][label] = 1 actions[RIGHT][label] = 1
actions[REDUCE][label] = 1 actions[REDUCE][label] = 1
for example in kwargs.get('gold_parses', []): for example in kwargs.get('gold_parses', []):
heads, labels = nonproj.projectivize(example.token_annotation.heads, heads, labels = nonproj.projectivize(example.get_aligned("HEAD"),
example.token_annotation.deps) example.get_aligned("DEP"))
for child, head, label in zip(example.token_annotation.ids, heads, labels): for child, head, label in zip(example.get_aligned("ID"), heads, labels):
if label.upper() == 'ROOT' : if label.upper() == 'ROOT' :
label = 'ROOT' label = 'ROOT'
if head == child: if head == child:

View File

@ -78,8 +78,8 @@ def is_decorated(label):
def count_decorated_labels(gold_data): def count_decorated_labels(gold_data):
freqs = {} freqs = {}
for example in gold_data: for example in gold_data:
proj_heads, deco_deps = projectivize(example.token_annotation.heads, proj_heads, deco_deps = projectivize(example.get_aligned("HEAD"),
example.token_annotation.deps) example.get_aligned("DEP"))
# set the label to ROOT for each root dependent # set the label to ROOT for each root dependent
deco_deps = ['ROOT' if head == i else deco_deps[i] deco_deps = ['ROOT' if head == i else deco_deps[i]
for i, head in enumerate(proj_heads)] for i, head in enumerate(proj_heads)]

View File

@ -497,6 +497,8 @@ def test_split_sents(merged_dict):
Doc(nlp.vocab, words=merged_dict["words"], spaces=merged_dict["spaces"]), Doc(nlp.vocab, words=merged_dict["words"], spaces=merged_dict["spaces"]),
merged_dict merged_dict
) )
assert example.text == "Hi there everyone It is just me"
assert len(get_parses_from_example( assert len(get_parses_from_example(
example, example,
merge=False, merge=False,