Add NER data test

2025-08-02 11:20:19 +03:00 · 2020-06-24 15:49:25 +02:00 · 2020-06-24 15:49:25 +02:00 · 2d5f5cb5fb
commit 2d5f5cb5fb
parent 14bb102a6b
1 changed files with 73 additions and 0 deletions
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@ -3,6 +3,7 @@ from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags
 from spacy.gold import spans_from_biluo_tags, iob_to_biluo, align
 from spacy.gold import Corpus, docs_to_json
 from spacy.gold.example import Example
+from spacy.gold.converters import json2docs
 from spacy.lang.en import English
 from spacy.syntax.nonproj import is_nonproj_tree
 from spacy.tokens import Doc, DocBin
@ -152,6 +153,77 @@ def test_gold_biluo_misalign(en_vocab):
    assert tags == ["O", "O", "O", "-", "-", "-"]


+def test_example_from_dict_no_ner(en_vocab):
+    words = ["a", "b", "c", "d"]
+    spaces = [True, True, False, True]
+    predicted = Doc(en_vocab, words=words, spaces=spaces)
+    example = Example.from_dict(predicted, {"words": words})
+    ner_tags = example.get_aligned_ner()
+    assert ner_tags == [None, None, None, None]
+
+def test_json2docs_no_ner(en_vocab):
+    data = [{
+        "id":1,
+            "paragraphs":[
+              {
+                "sentences":[
+                  {
+                    "tokens":[
+                      {
+                        "dep":"nn",
+                        "head":1,
+                        "tag":"NNP",
+                        "orth":"Ms."
+                      },
+                      {
+                        "dep":"nsubj",
+                        "head":1,
+                        "tag":"NNP",
+                        "orth":"Haag"
+                      },
+                      {
+                        "dep":"ROOT",
+                        "head":0,
+                        "tag":"VBZ",
+                        "orth":"plays"
+                      },
+                      {
+                        "dep":"dobj",
+                        "head":-1,
+                        "tag":"NNP",
+                        "orth":"Elianti"
+                      },
+                      {
+                        "dep":"punct",
+                        "head":-2,
+                        "tag":".",
+                        "orth":"."
+                      }
+                    ]
+                  }
+                ]
+              }
+            ]
+          }]
+    docs = json2docs(data)
+    assert len(docs) == 1
+    for doc in docs:
+        assert not doc.is_nered
+    for token in doc:
+        assert token.ent_iob == 0
+    eg = Example(
+        Doc(
+            doc.vocab,
+            words=[w.text for w in doc],
+            spaces=[bool(w.whitespace_) for w in doc]
+        ),
+        doc
+    )
+    ner_tags = eg.get_aligned_ner()
+    assert ner_tags == [None, None, None, None, None]
+
+         
+
 def test_split_sentences(en_vocab):
    words = ["I", "flew", "to", "San Francisco Valley", "had", "loads of fun"]
    doc = Doc(en_vocab, words=words)
@ -504,6 +576,7 @@ def test_tuple_format_implicit_invalid():
        _train(train_data)


+
 def _train(train_data):
    nlp = English()
    ner = nlp.create_pipe("ner")