mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-24 12:41:23 +03:00
Fix re-parsing of previously parsed text
If a Doc object had been previously parsed, it was possible for invalid parses to be added. There were two problems: 1) The parse was only being partially erased 2) The RightArc action was able to create a 1-cycle. This patch fixes both errors, and avoids resetting the parse if one is present. In theory this might allow a better parse to be predicted by running the parser twice. Closes #1253.
This commit is contained in:
parent
61bc203f3f
commit
f111b228e0
|
@ -212,7 +212,8 @@ cdef class LeftArc:
|
||||||
cdef class RightArc:
|
cdef class RightArc:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
cdef bint is_valid(const StateC* st, attr_t label) nogil:
|
cdef bint is_valid(const StateC* st, attr_t label) nogil:
|
||||||
return st.B_(0).sent_start != 1
|
# If there's (perhaps partial) parse pre-set, don't allow cycle.
|
||||||
|
return st.B_(0).sent_start != 1 and st.H(st.S(0)) != st.B(0)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
cdef int transition(StateC* st, attr_t label) nogil:
|
cdef int transition(StateC* st, attr_t label) nogil:
|
||||||
|
@ -446,14 +447,19 @@ cdef class ArcEager(TransitionSystem):
|
||||||
|
|
||||||
cdef int initialize_state(self, StateC* st) nogil:
|
cdef int initialize_state(self, StateC* st) nogil:
|
||||||
for i in range(st.length):
|
for i in range(st.length):
|
||||||
|
if st._sent[i].dep == 0:
|
||||||
st._sent[i].l_edge = i
|
st._sent[i].l_edge = i
|
||||||
st._sent[i].r_edge = i
|
st._sent[i].r_edge = i
|
||||||
|
st._sent[i].head = 0
|
||||||
|
st._sent[i].dep = 0
|
||||||
|
st._sent[i].l_kids = 0
|
||||||
|
st._sent[i].r_kids = 0
|
||||||
st.fast_forward()
|
st.fast_forward()
|
||||||
|
|
||||||
cdef int finalize_state(self, StateC* st) nogil:
|
cdef int finalize_state(self, StateC* st) nogil:
|
||||||
cdef int i
|
cdef int i
|
||||||
for i in range(st.length):
|
for i in range(st.length):
|
||||||
if st._sent[i].head == 0 and st._sent[i].dep == 0:
|
if st._sent[i].head == 0:
|
||||||
st._sent[i].dep = self.root_label
|
st._sent[i].dep = self.root_label
|
||||||
|
|
||||||
def finalize_doc(self, doc):
|
def finalize_doc(self, doc):
|
||||||
|
|
20
spacy/tests/regression/test_issue1253.py
Normal file
20
spacy/tests/regression/test_issue1253.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
import pytest
|
||||||
|
import spacy
|
||||||
|
|
||||||
|
|
||||||
|
def ss(tt):
|
||||||
|
for i in range(len(tt)-1):
|
||||||
|
for j in range(i+1, len(tt)):
|
||||||
|
tt[i:j].root
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.models('en')
|
||||||
|
def test_access_parse_for_merged():
|
||||||
|
nlp = spacy.load('en_core_web_sm')
|
||||||
|
t_t = nlp.tokenizer("Highly rated - I'll definitely")
|
||||||
|
nlp.tagger(t_t)
|
||||||
|
nlp.parser(t_t)
|
||||||
|
nlp.parser(t_t)
|
||||||
|
ss(t_t)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user