mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Add spans to doc.to_json (#10073)
* Add spans to to_json * adjustments to_json * Change docstring * change doc key naming * Update spacy/tokens/doc.pyx Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
23bc93d3d2
commit
b68bf43f5b
|
@ -1,5 +1,5 @@
|
|||
import pytest
|
||||
from spacy.tokens import Doc
|
||||
from spacy.tokens import Doc, Span
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
@ -60,3 +60,13 @@ def test_doc_to_json_underscore_error_serialize(doc):
|
|||
Doc.set_extension("json_test4", method=lambda doc: doc.text)
|
||||
with pytest.raises(ValueError):
|
||||
doc.to_json(underscore=["json_test4"])
|
||||
|
||||
|
||||
def test_doc_to_json_span(doc):
|
||||
"""Test that Doc.to_json() includes spans"""
|
||||
doc.spans["test"] = [Span(doc, 0, 2, "test"), Span(doc, 0, 1, "test")]
|
||||
json_doc = doc.to_json()
|
||||
assert "spans" in json_doc
|
||||
assert len(json_doc["spans"]) == 1
|
||||
assert len(json_doc["spans"]["test"]) == 2
|
||||
assert json_doc["spans"]["test"][0]["start"] == 0
|
||||
|
|
|
@ -1457,7 +1457,7 @@ cdef class Doc:
|
|||
underscore (list): Optional list of string names of custom doc._.
|
||||
attributes. Attribute values need to be JSON-serializable. Values will
|
||||
be added to an "_" key in the data, e.g. "_": {"foo": "bar"}.
|
||||
RETURNS (dict): The data in spaCy's JSON format.
|
||||
RETURNS (dict): The data in JSON format.
|
||||
"""
|
||||
data = {"text": self.text}
|
||||
if self.has_annotation("ENT_IOB"):
|
||||
|
@ -1486,6 +1486,15 @@ cdef class Doc:
|
|||
token_data["dep"] = token.dep_
|
||||
token_data["head"] = token.head.i
|
||||
data["tokens"].append(token_data)
|
||||
|
||||
if self.spans:
|
||||
data["spans"] = {}
|
||||
for span_group in self.spans:
|
||||
data["spans"][span_group] = []
|
||||
for span in self.spans[span_group]:
|
||||
span_data = {"start": span.start_char, "end": span.end_char, "label": span.label_, "kb_id": span.kb_id_}
|
||||
data["spans"][span_group].append(span_data)
|
||||
|
||||
if underscore:
|
||||
data["_"] = {}
|
||||
for attr in underscore:
|
||||
|
|
Loading…
Reference in New Issue
Block a user