Fix sentence fragments bug (#7056, #7035) (#7057)

* Add test for #7035

* Update test for issue 7056

* Fix test

* Fix transitions method used in testing

* Fix state eol detection when rebuffer

* Clean up redundant fix
This commit is contained in:
Matthew Honnibal 2021-02-14 13:38:13 +11:00 committed by GitHub
parent 660642902a
commit 0fb8d437c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 3 deletions

View File

@ -278,7 +278,7 @@ cdef cppclass StateC:
return this._stack.size()
int buffer_length() nogil const:
return this.length - this._b_i
return (this.length - this._b_i) + this._rebuffer.size()
void push() nogil:
b0 = this.B(0)

View File

@ -134,8 +134,6 @@ cdef class TransitionSystem:
def is_valid(self, StateClass stcls, move_name):
action = self.lookup_transition(move_name)
if action.move == 0:
return False
return action.is_valid(stcls.c, action.label)
cdef int set_valid(self, int* is_valid, const StateC* st) nogil:

View File

@ -0,0 +1,27 @@
import pytest
from spacy.tokens.doc import Doc
from spacy.vocab import Vocab
from spacy.pipeline._parser_internals.arc_eager import ArcEager
def test_issue7056():
"""Test that the Unshift transition works properly, and doesn't cause
sentence segmentation errors."""
vocab = Vocab()
ae = ArcEager(
vocab.strings,
ArcEager.get_actions(left_labels=["amod"], right_labels=["pobj"])
)
doc = Doc(vocab, words="Severe pain , after trauma".split())
state = ae.init_batch([doc])[0]
ae.apply_transition(state, "S")
ae.apply_transition(state, "L-amod")
ae.apply_transition(state, "S")
ae.apply_transition(state, "S")
ae.apply_transition(state, "S")
ae.apply_transition(state, "R-pobj")
ae.apply_transition(state, "D")
ae.apply_transition(state, "D")
ae.apply_transition(state, "D")
assert not state.eol()