spaCy/spacy/tests/regression/test_issue1622.py

# coding: utf-8
from __future__ import unicode_literals
import json
from tempfile import NamedTemporaryFile
import pytest

from ...cli.train import train


@pytest.mark.xfail
def test_cli_trained_model_can_be_saved(tmpdir):
    cmd = None
    lang = 'nl'
    output_dir = str(tmpdir)
    train_file = NamedTemporaryFile('wb', dir=output_dir, delete=False)
    train_corpus = [
        {
            "id": "identifier_0",
            "paragraphs": [
                {
                    "raw": "Jan houdt van Marie.\n",
                    "sentences": [
                        {
                            "tokens": [
                                {
                                    "id": 0,
                                    "dep": "nsubj",
                                    "head": 1,
                                    "tag": "NOUN",
                                    "orth": "Jan",
                                    "ner": "B-PER"
                                },
                                {
                                    "id": 1,
                                    "dep": "ROOT",
                                    "head": 0,
                                    "tag": "VERB",
                                    "orth": "houdt",
                                    "ner": "O"
                                },
                                {
                                    "id": 2,
                                    "dep": "case",
                                    "head": 1,
                                    "tag": "ADP",
                                    "orth": "van",
                                    "ner": "O"
                                },
                                {
                                    "id": 3,
                                    "dep": "obj",
                                    "head": -2,
                                    "tag": "NOUN",
                                    "orth": "Marie",
                                    "ner": "B-PER"
                                },
                                {
                                    "id": 4,
                                    "dep": "punct",
                                    "head": -3,
                                    "tag": "PUNCT",
                                    "orth": ".",
                                    "ner": "O"
                                },
                                {
                                    "id": 5,
                                    "dep": "",
                                    "head": -1,
                                    "tag": "SPACE",
                                    "orth": "\n",
                                    "ner": "O"
                                }
                            ],
                            "brackets": []
                        }
                    ]
                }
            ]
        }
    ]

    train_file.write(json.dumps(train_corpus).encode('utf-8'))
    train_file.close()
    train_data = train_file.name
    dev_data = train_data

    # spacy train -n 1 -g -1 nl output_nl training_corpus.json training \
    # corpus.json
    train(cmd, lang, output_dir, train_data, dev_data, n_iter=1)

    assert True
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00			`# coding: utf-8`
			`from __future__ import unicode_literals`
			`import json`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`from tempfile import NamedTemporaryFile`
Fix missing import 2017-12-22 18:21:44 +03:00			`import pytest`
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00
			`from ...cli.train import train`


xfail test that causes MemoryError on Python 2 on Windows Need to investigate this further! 2017-12-22 18:00:58 +03:00			`@pytest.mark.xfail`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`def test_cli_trained_model_can_be_saved(tmpdir):`
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00			`cmd = None`
			`lang = 'nl'`
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`output_dir = str(tmpdir)`
			`train_file = NamedTemporaryFile('wb', dir=output_dir, delete=False)`
			`train_corpus = [`
			`{`
			`"id": "identifier_0",`
			`"paragraphs": [`
			`{`
			`"raw": "Jan houdt van Marie.\n",`
			`"sentences": [`
			`{`
			`"tokens": [`
			`{`
			`"id": 0,`
			`"dep": "nsubj",`
			`"head": 1,`
			`"tag": "NOUN",`
			`"orth": "Jan",`
			`"ner": "B-PER"`
			`},`
			`{`
			`"id": 1,`
			`"dep": "ROOT",`
			`"head": 0,`
			`"tag": "VERB",`
			`"orth": "houdt",`
			`"ner": "O"`
			`},`
			`{`
			`"id": 2,`
			`"dep": "case",`
			`"head": 1,`
			`"tag": "ADP",`
			`"orth": "van",`
			`"ner": "O"`
			`},`
			`{`
			`"id": 3,`
			`"dep": "obj",`
			`"head": -2,`
			`"tag": "NOUN",`
			`"orth": "Marie",`
			`"ner": "B-PER"`
			`},`
			`{`
			`"id": 4,`
			`"dep": "punct",`
			`"head": -3,`
			`"tag": "PUNCT",`
			`"orth": ".",`
			`"ner": "O"`
			`},`
			`{`
			`"id": 5,`
			`"dep": "",`
			`"head": -1,`
			`"tag": "SPACE",`
			`"orth": "\n",`
			`"ner": "O"`
			`}`
			`],`
			`"brackets": []`
			`}`
			`]`
			`}`
			`]`
			`}`
			`]`
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`train_file.write(json.dumps(train_corpus).encode('utf-8'))`
			`train_file.close()`
			`train_data = train_file.name`
			`dev_data = train_data`
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`# spacy train -n 1 -g -1 nl output_nl training_corpus.json training \`
			`# corpus.json`
			`train(cmd, lang, output_dir, train_data, dev_data, n_iter=1)`
Adds regression test_issue1622 2017-12-11 00:20:12 +03:00
Switch from python 3 only TemporaryDirectory to pytest's tmpdir 2017-12-11 02:16:04 +03:00			`assert True`