spaCy/spacy/tests/regression/_test_issue1622.py

# coding: utf-8
from __future__ import unicode_literals

import json
from tempfile import NamedTemporaryFile

from spacy.cli.train import train


def test_cli_trained_model_can_be_saved(tmpdir):
    lang = "nl"
    output_dir = str(tmpdir)
    train_file = NamedTemporaryFile("wb", dir=output_dir, delete=False)
    train_corpus = [
        {
            "id": "identifier_0",
            "paragraphs": [
                {
                    "raw": "Jan houdt van Marie.\n",
                    "sentences": [
                        {
                            "tokens": [
                                {
                                    "id": 0,
                                    "dep": "nsubj",
                                    "head": 1,
                                    "tag": "NOUN",
                                    "orth": "Jan",
                                    "ner": "B-PER",
                                },
                                {
                                    "id": 1,
                                    "dep": "ROOT",
                                    "head": 0,
                                    "tag": "VERB",
                                    "orth": "houdt",
                                    "ner": "O",
                                },
                                {
                                    "id": 2,
                                    "dep": "case",
                                    "head": 1,
                                    "tag": "ADP",
                                    "orth": "van",
                                    "ner": "O",
                                },
                                {
                                    "id": 3,
                                    "dep": "obj",
                                    "head": -2,
                                    "tag": "NOUN",
                                    "orth": "Marie",
                                    "ner": "B-PER",
                                },
                                {
                                    "id": 4,
                                    "dep": "punct",
                                    "head": -3,
                                    "tag": "PUNCT",
                                    "orth": ".",
                                    "ner": "O",
                                },
                                {
                                    "id": 5,
                                    "dep": "",
                                    "head": -1,
                                    "tag": "SPACE",
                                    "orth": "\n",
                                    "ner": "O",
                                },
                            ],
                            "brackets": [],
                        }
                    ],
                }
            ],
        }
    ]

    train_file.write(json.dumps(train_corpus).encode("utf-8"))
    train_file.close()
    train_data = train_file.name
    dev_data = train_data

    # spacy train -n 1 -g -1 nl output_nl training_corpus.json training \
    # corpus.json
    train(lang, output_dir, train_data, dev_data, n_iter=1)

    assert True
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00			`# coding: utf-8`
			`from __future__ import unicode_literals`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00			`import json`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`from tempfile import NamedTemporaryFile`
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00
Tidy up regression tests 2019-02-08 17:51:13 +03:00			`from spacy.cli.train import train`
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00

Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`def test_cli_trained_model_can_be_saved(tmpdir):`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`lang = "nl"`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`output_dir = str(tmpdir)`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`train_file = NamedTemporaryFile("wb", dir=output_dir, delete=False)`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`train_corpus = [`
			`{`
			`"id": "identifier_0",`
			`"paragraphs": [`
			`{`
			`"raw": "Jan houdt van Marie.\n",`
			`"sentences": [`
			`{`
			`"tokens": [`
			`{`
			`"id": 0,`
			`"dep": "nsubj",`
			`"head": 1,`
			`"tag": "NOUN",`
			`"orth": "Jan",`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`"ner": "B-PER",`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`},`
			`{`
			`"id": 1,`
			`"dep": "ROOT",`
			`"head": 0,`
			`"tag": "VERB",`
			`"orth": "houdt",`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`"ner": "O",`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`},`
			`{`
			`"id": 2,`
			`"dep": "case",`
			`"head": 1,`
			`"tag": "ADP",`
			`"orth": "van",`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`"ner": "O",`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`},`
			`{`
			`"id": 3,`
			`"dep": "obj",`
			`"head": -2,`
			`"tag": "NOUN",`
			`"orth": "Marie",`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`"ner": "B-PER",`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`},`
			`{`
			`"id": 4,`
			`"dep": "punct",`
			`"head": -3,`
			`"tag": "PUNCT",`
			`"orth": ".",`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`"ner": "O",`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`},`
			`{`
			`"id": 5,`
			`"dep": "",`
			`"head": -1,`
			`"tag": "SPACE",`
			`"orth": "\n",`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`"ner": "O",`
			`},`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`],`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`"brackets": [],`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`}`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`],`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`}`
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`],`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`}`
			`]`
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00
Tidy up and fix small bugs and typos 2019-02-08 16:14:49 +03:00			`train_file.write(json.dumps(train_corpus).encode("utf-8"))`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`train_file.close()`
			`train_data = train_file.name`
			`dev_data = train_data`
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`# spacy train -n 1 -g -1 nl output_nl training_corpus.json training \`
			`# corpus.json`
Remove dummy variable from function calls 2018-01-05 11:37:05 +03:00			`train(lang, output_dir, train_data, dev_data, n_iter=1)`
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`assert True`