mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-19 20:52:23 +03:00
Make get_sentence_map work with init
When sentences are not available, just treat the whole doc as one sentence. A reasonable general fallback, but important due to the init call, where upstream components aren't run.
This commit is contained in:
parent
883c137b26
commit
a7d9c8156d
|
@ -145,6 +145,7 @@ def span_embeddings_forward(
|
|||
|
||||
tokvecs, docs = inputs
|
||||
|
||||
#TODO fix this
|
||||
dim = tokvecs[0].shape[1]
|
||||
|
||||
get_mentions = model.attrs["get_mentions"]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from thinc.types import Ints2d
|
||||
from spacy.tokens import Doc
|
||||
from typing import List, Tuple, Callable
|
||||
from typing import List, Tuple, Callable, Any
|
||||
from ...util import registry
|
||||
|
||||
# type alias to make writing this less tedious
|
||||
|
@ -109,13 +109,18 @@ def get_predicted_clusters(
|
|||
def get_sentence_map(doc: Doc):
|
||||
"""For the given span, return a list of sentence indexes."""
|
||||
|
||||
si = 0
|
||||
out = []
|
||||
for sent in doc.sents:
|
||||
for tok in sent:
|
||||
out.append(si)
|
||||
si += 1
|
||||
return out
|
||||
try:
|
||||
si = 0
|
||||
out = []
|
||||
for sent in doc.sents:
|
||||
for tok in sent:
|
||||
out.append(si)
|
||||
si += 1
|
||||
return out
|
||||
except ValueError:
|
||||
# If there are no sents then just return dummy values.
|
||||
# Shouldn't happen in general training, but typical in init.
|
||||
return [0] * len(doc)
|
||||
|
||||
|
||||
def get_candidate_mentions(
|
||||
|
@ -144,7 +149,7 @@ def get_candidate_mentions(
|
|||
|
||||
|
||||
@registry.misc("spacy.CorefCandidateGenerator.v0")
|
||||
def create_mention_generator() -> Callable:
|
||||
def create_mention_generator() -> Any:
|
||||
return get_candidate_mentions
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user