mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
Fix spancat for zero suggestions (#11860)
* Add test for spancat predict with zero suggestions * Fix spancat for zero suggestions * Undo changes to extract_spans * Use .sum() as in update
This commit is contained in:
parent
9cf3fa9711
commit
445c670a2d
|
@ -272,6 +272,9 @@ class SpanCategorizer(TrainablePipe):
|
||||||
DOCS: https://spacy.io/api/spancategorizer#predict
|
DOCS: https://spacy.io/api/spancategorizer#predict
|
||||||
"""
|
"""
|
||||||
indices = self.suggester(docs, ops=self.model.ops)
|
indices = self.suggester(docs, ops=self.model.ops)
|
||||||
|
if indices.lengths.sum() == 0:
|
||||||
|
scores = self.model.ops.alloc2f(0, 0)
|
||||||
|
else:
|
||||||
scores = self.model.predict((docs, indices)) # type: ignore
|
scores = self.model.predict((docs, indices)) # type: ignore
|
||||||
return indices, scores
|
return indices, scores
|
||||||
|
|
||||||
|
|
|
@ -372,24 +372,39 @@ def test_overfitting_IO_overlapping():
|
||||||
|
|
||||||
|
|
||||||
def test_zero_suggestions():
|
def test_zero_suggestions():
|
||||||
# Test with a suggester that returns 0 suggestions
|
# Test with a suggester that can return 0 suggestions
|
||||||
|
|
||||||
@registry.misc("test_zero_suggester")
|
@registry.misc("test_mixed_zero_suggester")
|
||||||
def make_zero_suggester():
|
def make_mixed_zero_suggester():
|
||||||
def zero_suggester(docs, *, ops=None):
|
def mixed_zero_suggester(docs, *, ops=None):
|
||||||
if ops is None:
|
if ops is None:
|
||||||
ops = get_current_ops()
|
ops = get_current_ops()
|
||||||
return Ragged(
|
spans = []
|
||||||
ops.xp.zeros((0, 0), dtype="i"), ops.xp.zeros((len(docs),), dtype="i")
|
lengths = []
|
||||||
)
|
for doc in docs:
|
||||||
|
if len(doc) > 0 and len(doc) % 2 == 0:
|
||||||
|
spans.append((0, 1))
|
||||||
|
lengths.append(1)
|
||||||
|
else:
|
||||||
|
lengths.append(0)
|
||||||
|
spans = ops.asarray2i(spans)
|
||||||
|
lengths_array = ops.asarray1i(lengths)
|
||||||
|
if len(spans) > 0:
|
||||||
|
output = Ragged(ops.xp.vstack(spans), lengths_array)
|
||||||
|
else:
|
||||||
|
output = Ragged(ops.xp.zeros((0, 0), dtype="i"), lengths_array)
|
||||||
|
return output
|
||||||
|
|
||||||
return zero_suggester
|
return mixed_zero_suggester
|
||||||
|
|
||||||
fix_random_seed(0)
|
fix_random_seed(0)
|
||||||
nlp = English()
|
nlp = English()
|
||||||
spancat = nlp.add_pipe(
|
spancat = nlp.add_pipe(
|
||||||
"spancat",
|
"spancat",
|
||||||
config={"suggester": {"@misc": "test_zero_suggester"}, "spans_key": SPAN_KEY},
|
config={
|
||||||
|
"suggester": {"@misc": "test_mixed_zero_suggester"},
|
||||||
|
"spans_key": SPAN_KEY,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
train_examples = make_examples(nlp)
|
train_examples = make_examples(nlp)
|
||||||
optimizer = nlp.initialize(get_examples=lambda: train_examples)
|
optimizer = nlp.initialize(get_examples=lambda: train_examples)
|
||||||
|
@ -397,6 +412,16 @@ def test_zero_suggestions():
|
||||||
assert set(spancat.labels) == {"LOC", "PERSON"}
|
assert set(spancat.labels) == {"LOC", "PERSON"}
|
||||||
|
|
||||||
nlp.update(train_examples, sgd=optimizer)
|
nlp.update(train_examples, sgd=optimizer)
|
||||||
|
# empty doc
|
||||||
|
nlp("")
|
||||||
|
# single doc with zero suggestions
|
||||||
|
nlp("one")
|
||||||
|
# single doc with one suggestion
|
||||||
|
nlp("two two")
|
||||||
|
# batch with mixed zero/one suggestions
|
||||||
|
list(nlp.pipe(["one", "two two", "three three three", "", "four four four four"]))
|
||||||
|
# batch with no suggestions
|
||||||
|
list(nlp.pipe(["", "one", "three three three"]))
|
||||||
|
|
||||||
|
|
||||||
def test_set_candidates():
|
def test_set_candidates():
|
||||||
|
|
Loading…
Reference in New Issue
Block a user