From c2475a25de117819a3509c6a51cbb1dfe32be46e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= Date: Wed, 26 Jan 2022 12:01:37 +0100 Subject: [PATCH] Improve unseen label masking Two changes to speed up masking by ~10%: - Use a bool array rather than an array of float32. - Let the mask indicate whether a label was seen, rather than unseen. The mask is most frequently used to index scores for seen labels. However, since the mask marked unseen labels, this required computing an intermittent flipped mask. --- spacy/ml/tb_framework.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/spacy/ml/tb_framework.py b/spacy/ml/tb_framework.py index 9aac5b801..dd2ff6c19 100644 --- a/spacy/ml/tb_framework.py +++ b/spacy/ml/tb_framework.py @@ -150,7 +150,7 @@ def forward(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: boo all_statevecs = [] all_scores = [] next_states = [s for s in states if not s.is_final()] - unseen_mask = _get_unseen_mask(model) + seen_mask = _get_seen_mask(model) ids = numpy.zeros((len(states), nF), dtype="i") arange = model.ops.xp.arange(nF) while next_states: @@ -168,9 +168,10 @@ def forward(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: boo # to get the logits. scores = model.ops.gemm(statevecs, upper_W, trans2=True) scores += upper_b - scores[:, unseen_mask == 0] = model.ops.xp.nanmin(scores) + scores[:, seen_mask] = model.ops.xp.nanmin(scores) # Transition the states, filtering out any that are finished. - next_states = moves.transition_states(next_states, scores) + cpu_scores = model.ops.to_numpy(scores) + next_states = moves.transition_states(next_states, cpu_scores) all_scores.append(scores) if is_train: # Remember intermediate results for the backprop. @@ -191,7 +192,7 @@ def forward(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: boo for clas in set(model.attrs["unseen_classes"]): if (d_scores[:, clas] < 0).any(): model.attrs["unseen_classes"].remove(clas) - d_scores *= unseen_mask + d_scores *= seen_mask == False # Calculate the gradients for the parameters of the upper layer. # The weight gemm is (nS, nO) @ (nS, nH).T model.inc_grad("upper_b", d_scores.sum(axis=0)) @@ -240,7 +241,7 @@ def _forward_reference( all_scores = [] all_tokfeats = [] next_states = [s for s in states if not s.is_final()] - unseen_mask = _get_unseen_mask(model) + seen_mask = _get_seen_mask(model) ids = numpy.zeros((len(states), nF), dtype="i") while next_states: ids = ids[: len(next_states)] @@ -258,7 +259,7 @@ def _forward_reference( # to get the logits. scores = model.ops.gemm(statevecs, upper_W, trans2=True) scores += upper_b - scores[:, unseen_mask == 0] = model.ops.xp.nanmin(scores) + scores[:, seen_mask] = model.ops.xp.nanmin(scores) # Transition the states, filtering out any that are finished. next_states = moves.transition_states(next_states, scores) all_scores.append(scores) @@ -285,7 +286,7 @@ def _forward_reference( for clas in set(model.attrs["unseen_classes"]): if (d_scores[:, clas] < 0).any(): model.attrs["unseen_classes"].remove(clas) - d_scores *= unseen_mask + d_scores *= seen_mask == False assert statevecs.shape == (nS, nH), statevecs.shape assert d_scores.shape == (nS, nO), d_scores.shape # Calculate the gradients for the parameters of the upper layer. @@ -314,11 +315,10 @@ def _forward_reference( return (states, all_scores), backprop_parser -def _get_unseen_mask(model: Model) -> Floats1d: - mask = model.ops.alloc1f(model.get_dim("nO")) - mask.fill(1) +def _get_seen_mask(model: Model) -> Floats1d: + mask = model.ops.xp.zeros(model.get_dim("nO"), dtype="bool") for class_ in model.attrs.get("unseen_classes", set()): - mask[class_] = 0 + mask[class_] = True return mask