mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-10 16:40:34 +03:00
Add NER data test
This commit is contained in:
parent
14bb102a6b
commit
2d5f5cb5fb
|
@ -3,6 +3,7 @@ from spacy.gold import biluo_tags_from_offsets, offsets_from_biluo_tags
|
|||
from spacy.gold import spans_from_biluo_tags, iob_to_biluo, align
|
||||
from spacy.gold import Corpus, docs_to_json
|
||||
from spacy.gold.example import Example
|
||||
from spacy.gold.converters import json2docs
|
||||
from spacy.lang.en import English
|
||||
from spacy.syntax.nonproj import is_nonproj_tree
|
||||
from spacy.tokens import Doc, DocBin
|
||||
|
@ -152,6 +153,77 @@ def test_gold_biluo_misalign(en_vocab):
|
|||
assert tags == ["O", "O", "O", "-", "-", "-"]
|
||||
|
||||
|
||||
def test_example_from_dict_no_ner(en_vocab):
|
||||
words = ["a", "b", "c", "d"]
|
||||
spaces = [True, True, False, True]
|
||||
predicted = Doc(en_vocab, words=words, spaces=spaces)
|
||||
example = Example.from_dict(predicted, {"words": words})
|
||||
ner_tags = example.get_aligned_ner()
|
||||
assert ner_tags == [None, None, None, None]
|
||||
|
||||
def test_json2docs_no_ner(en_vocab):
|
||||
data = [{
|
||||
"id":1,
|
||||
"paragraphs":[
|
||||
{
|
||||
"sentences":[
|
||||
{
|
||||
"tokens":[
|
||||
{
|
||||
"dep":"nn",
|
||||
"head":1,
|
||||
"tag":"NNP",
|
||||
"orth":"Ms."
|
||||
},
|
||||
{
|
||||
"dep":"nsubj",
|
||||
"head":1,
|
||||
"tag":"NNP",
|
||||
"orth":"Haag"
|
||||
},
|
||||
{
|
||||
"dep":"ROOT",
|
||||
"head":0,
|
||||
"tag":"VBZ",
|
||||
"orth":"plays"
|
||||
},
|
||||
{
|
||||
"dep":"dobj",
|
||||
"head":-1,
|
||||
"tag":"NNP",
|
||||
"orth":"Elianti"
|
||||
},
|
||||
{
|
||||
"dep":"punct",
|
||||
"head":-2,
|
||||
"tag":".",
|
||||
"orth":"."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}]
|
||||
docs = json2docs(data)
|
||||
assert len(docs) == 1
|
||||
for doc in docs:
|
||||
assert not doc.is_nered
|
||||
for token in doc:
|
||||
assert token.ent_iob == 0
|
||||
eg = Example(
|
||||
Doc(
|
||||
doc.vocab,
|
||||
words=[w.text for w in doc],
|
||||
spaces=[bool(w.whitespace_) for w in doc]
|
||||
),
|
||||
doc
|
||||
)
|
||||
ner_tags = eg.get_aligned_ner()
|
||||
assert ner_tags == [None, None, None, None, None]
|
||||
|
||||
|
||||
|
||||
def test_split_sentences(en_vocab):
|
||||
words = ["I", "flew", "to", "San Francisco Valley", "had", "loads of fun"]
|
||||
doc = Doc(en_vocab, words=words)
|
||||
|
@ -504,6 +576,7 @@ def test_tuple_format_implicit_invalid():
|
|||
_train(train_data)
|
||||
|
||||
|
||||
|
||||
def _train(train_data):
|
||||
nlp = English()
|
||||
ner = nlp.create_pipe("ner")
|
||||
|
|
Loading…
Reference in New Issue
Block a user