mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-18 20:22:25 +03:00
Remove span2head
This doesn't work as a component because it needs to modify gold data, so instead it's a conversion script (in another repo).
This commit is contained in:
parent
0522a43116
commit
17d017a177
|
@ -91,32 +91,6 @@ DEFAULT_MODEL = Config().from_str(default_config)["model"]
|
||||||
|
|
||||||
DEFAULT_CLUSTERS_PREFIX = "coref_clusters"
|
DEFAULT_CLUSTERS_PREFIX = "coref_clusters"
|
||||||
|
|
||||||
@Language.component("span2head")
|
|
||||||
def make_head_only_clusters(doc, old_key="coref_clusters", new_key="coref_head_clusters"):
|
|
||||||
"""Create coref head clusters from span clusters.
|
|
||||||
|
|
||||||
The old clusters are left alone, and the new clusters are added under a different key.
|
|
||||||
"""
|
|
||||||
final = []
|
|
||||||
for key, sg in doc.spans.items():
|
|
||||||
if not key.startswith("{old_key}_"):
|
|
||||||
continue
|
|
||||||
|
|
||||||
heads = [span.root.i for span in sg]
|
|
||||||
heads = sorted(list(set(heads)))
|
|
||||||
head_spans = [doc[hh:hh+1] for hh in heads]
|
|
||||||
#print("===== headifying =====")
|
|
||||||
#print(sg)
|
|
||||||
#print(head_spans)
|
|
||||||
# singletons are skipped
|
|
||||||
if len(heads) > 1:
|
|
||||||
final.append(head_spans)
|
|
||||||
|
|
||||||
# now add the new spangroups
|
|
||||||
for ii, spans in enumerate(final):
|
|
||||||
doc.spans[f"{new_key}_{ii}"] = spans
|
|
||||||
return doc
|
|
||||||
|
|
||||||
@Language.factory(
|
@Language.factory(
|
||||||
"coref",
|
"coref",
|
||||||
assigns=["doc.spans"],
|
assigns=["doc.spans"],
|
||||||
|
|
Loading…
Reference in New Issue
Block a user