From 0fb8d437c09234ce913cb84982acfbdaf7b8c61d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 14 Feb 2021 13:38:13 +1100 Subject: [PATCH] Fix sentence fragments bug (#7056, #7035) (#7057) * Add test for #7035 * Update test for issue 7056 * Fix test * Fix transitions method used in testing * Fix state eol detection when rebuffer * Clean up redundant fix --- spacy/pipeline/_parser_internals/_state.pxd | 2 +- .../_parser_internals/transition_system.pyx | 2 -- spacy/tests/regression/test_issue7056.py | 27 +++++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 spacy/tests/regression/test_issue7056.py diff --git a/spacy/pipeline/_parser_internals/_state.pxd b/spacy/pipeline/_parser_internals/_state.pxd index a6bf926f9..161f3ca48 100644 --- a/spacy/pipeline/_parser_internals/_state.pxd +++ b/spacy/pipeline/_parser_internals/_state.pxd @@ -278,7 +278,7 @@ cdef cppclass StateC: return this._stack.size() int buffer_length() nogil const: - return this.length - this._b_i + return (this.length - this._b_i) + this._rebuffer.size() void push() nogil: b0 = this.B(0) diff --git a/spacy/pipeline/_parser_internals/transition_system.pyx b/spacy/pipeline/_parser_internals/transition_system.pyx index 9bb4f7f5f..9e6f847eb 100644 --- a/spacy/pipeline/_parser_internals/transition_system.pyx +++ b/spacy/pipeline/_parser_internals/transition_system.pyx @@ -134,8 +134,6 @@ cdef class TransitionSystem: def is_valid(self, StateClass stcls, move_name): action = self.lookup_transition(move_name) - if action.move == 0: - return False return action.is_valid(stcls.c, action.label) cdef int set_valid(self, int* is_valid, const StateC* st) nogil: diff --git a/spacy/tests/regression/test_issue7056.py b/spacy/tests/regression/test_issue7056.py new file mode 100644 index 000000000..64a420b84 --- /dev/null +++ b/spacy/tests/regression/test_issue7056.py @@ -0,0 +1,27 @@ +import pytest + +from spacy.tokens.doc import Doc +from spacy.vocab import Vocab +from spacy.pipeline._parser_internals.arc_eager import ArcEager + + +def test_issue7056(): + """Test that the Unshift transition works properly, and doesn't cause + sentence segmentation errors.""" + vocab = Vocab() + ae = ArcEager( + vocab.strings, + ArcEager.get_actions(left_labels=["amod"], right_labels=["pobj"]) + ) + doc = Doc(vocab, words="Severe pain , after trauma".split()) + state = ae.init_batch([doc])[0] + ae.apply_transition(state, "S") + ae.apply_transition(state, "L-amod") + ae.apply_transition(state, "S") + ae.apply_transition(state, "S") + ae.apply_transition(state, "S") + ae.apply_transition(state, "R-pobj") + ae.apply_transition(state, "D") + ae.apply_transition(state, "D") + ae.apply_transition(state, "D") + assert not state.eol()