Add NER data test

2025-10-21 11:14:32 +03:00 · 2020-06-24 15:49:25 +02:00 · 2020-06-24 15:49:25 +02:00 · 2d5f5cb5fb
commit 2d5f5cb5fb
parent 14bb102a6b
1 changed files with 73 additions and 0 deletions
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@ -3,6 +3,7 @@ from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags
 from spacy.gold import spans_from_biluo_tags, iob_to_biluo, align
 from spacy.gold import Corpus, docs_to_json
 from spacy.gold.example import Example
 from spacy.gold.converters import json2docs
 from spacy.lang.en import English
 from spacy.syntax.nonproj import is_nonproj_tree
 from spacy.tokens import Doc, DocBin
@ -152,6 +153,77 @@ def test_gold_biluo_misalign(en_vocab):
    assert tags == ["O", "O", "O", "-", "-", "-"]
 def test_example_from_dict_no_ner(en_vocab):
    words = ["a", "b", "c", "d"]
    spaces = [True, True, False, True]
    predicted = Doc(en_vocab, words=words, spaces=spaces)
    example = Example.from_dict(predicted, {"words": words})
    ner_tags = example.get_aligned_ner()
    assert ner_tags == [None, None, None, None]
 def test_json2docs_no_ner(en_vocab):
    data = [{
        "id":1,
            "paragraphs":[
              {
                "sentences":[
                  {
                    "tokens":[
                      {
                        "dep":"nn",
                        "head":1,
                        "tag":"NNP",
                        "orth":"Ms."
                      },
                      {
                        "dep":"nsubj",
                        "head":1,
                        "tag":"NNP",
                        "orth":"Haag"
                      },
                      {
                        "dep":"ROOT",
                        "head":0,
                        "tag":"VBZ",
                        "orth":"plays"
                      },
                      {
                        "dep":"dobj",
                        "head":-1,
                        "tag":"NNP",
                        "orth":"Elianti"
                      },
                      {
                        "dep":"punct",
                        "head":-2,
                        "tag":".",
                        "orth":"."
                      }
                    ]
                  }
                ]
              }
            ]
          }]
    docs = json2docs(data)
    assert len(docs) == 1
    for doc in docs:
        assert not doc.is_nered
    for token in doc:
        assert token.ent_iob == 0
    eg = Example(
        Doc(
            doc.vocab,
            words=[w.text for w in doc],
            spaces=[bool(w.whitespace_) for w in doc]
        ),
        doc
    )
    ner_tags = eg.get_aligned_ner()
    assert ner_tags == [None, None, None, None, None]
 def test_split_sentences(en_vocab):
    words = ["I", "flew", "to", "San Francisco Valley", "had", "loads of fun"]
    doc = Doc(en_vocab, words=words)
@ -504,6 +576,7 @@ def test_tuple_format_implicit_invalid():
        _train(train_data)
 def _train(train_data):
    nlp = English()
    ner = nlp.create_pipe("ner")