mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 09:44:36 +03:00
Auto-format [ci skip]
This commit is contained in:
parent
73dc63d3bf
commit
cc05d9dad6
|
@ -57,7 +57,7 @@ def convert(
|
||||||
is written to stdout, so you can pipe them forward to a JSON file:
|
is written to stdout, so you can pipe them forward to a JSON file:
|
||||||
$ spacy convert some_file.conllu > some_file.json
|
$ spacy convert some_file.conllu > some_file.json
|
||||||
"""
|
"""
|
||||||
no_print = (output_dir == "-")
|
no_print = output_dir == "-"
|
||||||
msg = Printer(no_print=no_print)
|
msg = Printer(no_print=no_print)
|
||||||
input_path = Path(input_file)
|
input_path = Path(input_file)
|
||||||
if file_type not in FILE_TYPES:
|
if file_type not in FILE_TYPES:
|
||||||
|
|
|
@ -9,7 +9,9 @@ from ...tokens.doc import Doc
|
||||||
from ...util import load_model
|
from ...util import load_model
|
||||||
|
|
||||||
|
|
||||||
def conll_ner2json(input_data, n_sents=10, seg_sents=False, model=None, no_print=False, **kwargs):
|
def conll_ner2json(
|
||||||
|
input_data, n_sents=10, seg_sents=False, model=None, no_print=False, **kwargs
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Convert files in the CoNLL-2003 NER format and similar
|
Convert files in the CoNLL-2003 NER format and similar
|
||||||
whitespace-separated columns into JSON format for use with train cli.
|
whitespace-separated columns into JSON format for use with train cli.
|
||||||
|
|
|
@ -297,8 +297,10 @@ def test_issue3410():
|
||||||
def test_issue3412():
|
def test_issue3412():
|
||||||
data = numpy.asarray([[0, 0, 0], [1, 2, 3], [9, 8, 7]], dtype="f")
|
data = numpy.asarray([[0, 0, 0], [1, 2, 3], [9, 8, 7]], dtype="f")
|
||||||
vectors = Vectors(data=data)
|
vectors = Vectors(data=data)
|
||||||
keys, best_rows, scores = vectors.most_similar(numpy.asarray([[9, 8, 7], [0, 0, 0]], dtype="f"))
|
keys, best_rows, scores = vectors.most_similar(
|
||||||
assert(best_rows[0] == 2)
|
numpy.asarray([[9, 8, 7], [0, 0, 0]], dtype="f")
|
||||||
|
)
|
||||||
|
assert best_rows[0] == 2
|
||||||
|
|
||||||
|
|
||||||
def test_issue3447():
|
def test_issue3447():
|
||||||
|
|
|
@ -90,7 +90,7 @@ def test_gold_ner_missing_tags(en_tokenizer):
|
||||||
def test_iob_to_biluo():
|
def test_iob_to_biluo():
|
||||||
good_iob = ["O", "O", "B-LOC", "I-LOC", "O", "B-PERSON"]
|
good_iob = ["O", "O", "B-LOC", "I-LOC", "O", "B-PERSON"]
|
||||||
good_biluo = ["O", "O", "B-LOC", "L-LOC", "O", "U-PERSON"]
|
good_biluo = ["O", "O", "B-LOC", "L-LOC", "O", "U-PERSON"]
|
||||||
bad_iob = ["O", "O", "\"", "B-LOC", "I-LOC"]
|
bad_iob = ["O", "O", '"', "B-LOC", "I-LOC"]
|
||||||
converted_biluo = iob_to_biluo(good_iob)
|
converted_biluo = iob_to_biluo(good_iob)
|
||||||
assert good_biluo == converted_biluo
|
assert good_biluo == converted_biluo
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
|
@ -99,9 +99,9 @@ def test_iob_to_biluo():
|
||||||
|
|
||||||
def test_roundtrip_docs_to_json():
|
def test_roundtrip_docs_to_json():
|
||||||
text = "I flew to Silicon Valley via London."
|
text = "I flew to Silicon Valley via London."
|
||||||
tags = ['PRP', 'VBD', 'IN', 'NNP', 'NNP', 'IN', 'NNP', '.']
|
tags = ["PRP", "VBD", "IN", "NNP", "NNP", "IN", "NNP", "."]
|
||||||
heads = [1, 1, 1, 4, 2, 1, 5, 1]
|
heads = [1, 1, 1, 4, 2, 1, 5, 1]
|
||||||
deps = ['nsubj', 'ROOT', 'prep', 'compound', 'pobj', 'prep', 'pobj', 'punct']
|
deps = ["nsubj", "ROOT", "prep", "compound", "pobj", "prep", "pobj", "punct"]
|
||||||
biluo_tags = ["O", "O", "O", "B-LOC", "L-LOC", "O", "U-GPE", "O"]
|
biluo_tags = ["O", "O", "O", "B-LOC", "L-LOC", "O", "U-GPE", "O"]
|
||||||
cats = {"TRAVEL": 1.0, "BAKING": 0.0}
|
cats = {"TRAVEL": 1.0, "BAKING": 0.0}
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user