mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-04 21:50:35 +03:00
* Fix Issue #54: Error merging multi-word token when there's a mid-token match.
This commit is contained in:
parent
42617548af
commit
2ef170a991
|
@ -281,6 +281,8 @@ cdef class Tokens:
|
||||||
if self.data[i].idx == start_idx:
|
if self.data[i].idx == start_idx:
|
||||||
start = i
|
start = i
|
||||||
if (self.data[i].idx + self.data[i].lex.length) == end_idx:
|
if (self.data[i].idx + self.data[i].lex.length) == end_idx:
|
||||||
|
if start == -1:
|
||||||
|
return None
|
||||||
end = i + 1
|
end = i + 1
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -30,3 +30,9 @@ def test_merge_heads():
|
||||||
assert tokens[3].head.i == 1
|
assert tokens[3].head.i == 1
|
||||||
assert tokens[4].head.i in [1, 3]
|
assert tokens[4].head.i in [1, 3]
|
||||||
assert tokens[5].head.i == 4
|
assert tokens[5].head.i == 4
|
||||||
|
|
||||||
|
|
||||||
|
def test_issue_54():
|
||||||
|
text = u'Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1).'
|
||||||
|
tokens = NLU(text, merge_mwes=True)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user