2017-01-14 23:30:36 +03:00
|
|
|
# coding: utf-8
|
|
|
|
from __future__ import unicode_literals, print_function
|
2015-10-25 15:15:51 +03:00
|
|
|
|
2018-03-27 20:23:02 +03:00
|
|
|
import contextlib
|
|
|
|
from pathlib import Path
|
2019-09-29 18:32:12 +03:00
|
|
|
from fabric.api import local, lcd, env, settings, prefix
|
2017-01-14 23:30:36 +03:00
|
|
|
from os import path, environ
|
2018-03-27 20:23:02 +03:00
|
|
|
import shutil
|
2018-05-01 18:51:22 +03:00
|
|
|
import sys
|
2014-09-11 14:28:38 +04:00
|
|
|
|
2015-01-03 13:02:21 +03:00
|
|
|
|
|
|
|
PWD = path.dirname(__file__)
|
2019-09-30 14:14:48 +03:00
|
|
|
ENV = environ['VENV_DIR'] if 'VENV_DIR' in environ else '.env'
|
2018-03-27 20:23:02 +03:00
|
|
|
VENV_DIR = Path(PWD) / ENV
|
2015-01-03 13:02:21 +03:00
|
|
|
|
|
|
|
|
2018-03-27 20:23:02 +03:00
|
|
|
@contextlib.contextmanager
|
2019-09-30 14:14:48 +03:00
|
|
|
def virtualenv(name, create=False, python='/usr/bin/python3.6'):
|
2018-03-27 20:23:02 +03:00
|
|
|
python = Path(python).resolve()
|
|
|
|
env_path = VENV_DIR
|
|
|
|
if create:
|
|
|
|
if env_path.exists():
|
|
|
|
shutil.rmtree(str(env_path))
|
2019-09-30 14:14:48 +03:00
|
|
|
local('{python} -m venv {env_path}'.format(python=python, env_path=VENV_DIR))
|
2018-03-27 20:23:02 +03:00
|
|
|
def wrapped_local(cmd, env_vars=[], capture=False, direct=False):
|
2019-09-30 14:14:48 +03:00
|
|
|
return local('source {}/bin/activate && {}'.format(env_path, cmd),
|
|
|
|
shell='/bin/bash', capture=False)
|
2018-03-27 20:23:02 +03:00
|
|
|
yield wrapped_local
|
|
|
|
|
|
|
|
|
2019-09-30 14:14:48 +03:00
|
|
|
def env(lang='python3.6'):
|
2018-03-27 20:23:02 +03:00
|
|
|
if VENV_DIR.exists():
|
2019-09-30 14:14:48 +03:00
|
|
|
local('rm -rf {env}'.format(env=VENV_DIR))
|
|
|
|
if lang.startswith('python3'):
|
|
|
|
local('{lang} -m venv {env}'.format(lang=lang, env=VENV_DIR))
|
2018-03-27 20:23:02 +03:00
|
|
|
else:
|
2019-09-30 14:14:48 +03:00
|
|
|
local('{lang} -m pip install virtualenv --no-cache-dir'.format(lang=lang))
|
|
|
|
local('{lang} -m virtualenv {env} --no-cache-dir'.format(lang=lang, env=VENV_DIR))
|
2018-03-27 20:23:02 +03:00
|
|
|
with virtualenv(VENV_DIR) as venv_local:
|
2019-09-30 14:14:48 +03:00
|
|
|
print(venv_local('python --version', capture=True))
|
|
|
|
venv_local('pip install --upgrade setuptools --no-cache-dir')
|
|
|
|
venv_local('pip install pytest --no-cache-dir')
|
|
|
|
venv_local('pip install wheel --no-cache-dir')
|
|
|
|
venv_local('pip install -r requirements.txt --no-cache-dir')
|
|
|
|
venv_local('pip install pex --no-cache-dir')
|
|
|
|
|
2015-01-03 13:02:21 +03:00
|
|
|
|
|
|
|
|
|
|
|
def install():
|
2018-03-27 20:23:02 +03:00
|
|
|
with virtualenv(VENV_DIR) as venv_local:
|
2019-09-30 14:14:48 +03:00
|
|
|
venv_local('pip install dist/*.tar.gz')
|
2015-01-03 13:02:21 +03:00
|
|
|
|
2014-07-05 22:49:34 +04:00
|
|
|
|
|
|
|
def make():
|
2018-03-27 20:23:02 +03:00
|
|
|
with lcd(path.dirname(__file__)):
|
2019-09-30 14:14:48 +03:00
|
|
|
local('export PYTHONPATH=`pwd` && source .env/bin/activate && python setup.py build_ext --inplace',
|
|
|
|
shell='/bin/bash')
|
2015-01-03 13:02:21 +03:00
|
|
|
|
2017-11-07 14:11:08 +03:00
|
|
|
def sdist():
|
2018-03-27 20:23:02 +03:00
|
|
|
with virtualenv(VENV_DIR) as venv_local:
|
2017-11-07 14:11:08 +03:00
|
|
|
with lcd(path.dirname(__file__)):
|
2019-09-30 14:14:48 +03:00
|
|
|
local('python -m pip install -U setuptools')
|
|
|
|
local('python setup.py sdist')
|
2015-01-03 13:02:21 +03:00
|
|
|
|
2018-03-27 20:23:02 +03:00
|
|
|
def wheel():
|
|
|
|
with virtualenv(VENV_DIR) as venv_local:
|
|
|
|
with lcd(path.dirname(__file__)):
|
2019-09-30 14:14:48 +03:00
|
|
|
venv_local('python setup.py bdist_wheel')
|
2018-03-27 20:23:02 +03:00
|
|
|
|
|
|
|
def pex():
|
|
|
|
with virtualenv(VENV_DIR) as venv_local:
|
|
|
|
with lcd(path.dirname(__file__)):
|
2019-09-30 14:14:48 +03:00
|
|
|
sha = local('git rev-parse --short HEAD', capture=True)
|
|
|
|
venv_local('pex dist/*.whl -e spacy -o dist/spacy-%s.pex' % sha,
|
|
|
|
direct=True)
|
2018-03-27 20:23:02 +03:00
|
|
|
|
|
|
|
|
2015-01-03 13:02:21 +03:00
|
|
|
def clean():
|
|
|
|
with lcd(path.dirname(__file__)):
|
2019-09-30 14:14:48 +03:00
|
|
|
local('rm -f dist/*.whl')
|
|
|
|
local('rm -f dist/*.pex')
|
2018-03-27 20:23:02 +03:00
|
|
|
with virtualenv(VENV_DIR) as venv_local:
|
2019-09-30 14:14:48 +03:00
|
|
|
venv_local('python setup.py clean --all')
|
2015-01-03 13:02:21 +03:00
|
|
|
|
2014-07-05 22:49:34 +04:00
|
|
|
|
2015-01-03 13:02:21 +03:00
|
|
|
def test():
|
2018-03-27 20:23:02 +03:00
|
|
|
with virtualenv(VENV_DIR) as venv_local:
|
2015-01-03 13:02:21 +03:00
|
|
|
with lcd(path.dirname(__file__)):
|
2019-09-30 14:14:48 +03:00
|
|
|
venv_local('pytest -x spacy/tests')
|
2018-03-27 20:23:02 +03:00
|
|
|
|
|
|
|
def train():
|
2019-09-30 14:14:48 +03:00
|
|
|
args = environ.get('SPACY_TRAIN_ARGS', '')
|
2018-03-27 20:23:02 +03:00
|
|
|
with virtualenv(VENV_DIR) as venv_local:
|
2019-09-30 14:14:48 +03:00
|
|
|
venv_local('spacy train {args}'.format(args=args))
|
2018-05-01 18:51:22 +03:00
|
|
|
|
|
|
|
|
2019-09-30 14:14:48 +03:00
|
|
|
def conll17(treebank_dir, experiment_dir, vectors_dir, config, corpus=''):
|
|
|
|
is_not_clean = local('git status --porcelain', capture=True)
|
2018-05-01 18:51:22 +03:00
|
|
|
if is_not_clean:
|
|
|
|
print("Repository is not clean")
|
|
|
|
print(is_not_clean)
|
|
|
|
sys.exit(1)
|
2019-09-30 14:14:48 +03:00
|
|
|
git_sha = local('git rev-parse --short HEAD', capture=True)
|
|
|
|
config_checksum = local('sha256sum {config}'.format(config=config), capture=True)
|
|
|
|
experiment_dir = Path(experiment_dir) / '{}--{}'.format(config_checksum[:6], git_sha)
|
2018-05-01 18:51:22 +03:00
|
|
|
if not experiment_dir.exists():
|
|
|
|
experiment_dir.mkdir()
|
2019-09-30 14:14:48 +03:00
|
|
|
test_data_dir = Path(treebank_dir) / 'ud-test-v2.0-conll2017'
|
2018-05-01 18:51:22 +03:00
|
|
|
assert test_data_dir.exists()
|
|
|
|
assert test_data_dir.is_dir()
|
|
|
|
if corpus:
|
|
|
|
corpora = [corpus]
|
|
|
|
else:
|
2019-09-30 14:14:48 +03:00
|
|
|
corpora = ['UD_English', 'UD_Chinese', 'UD_Japanese', 'UD_Vietnamese']
|
2018-05-01 18:51:22 +03:00
|
|
|
|
2019-09-30 14:14:48 +03:00
|
|
|
local('cp {config} {experiment_dir}/config.json'.format(config=config, experiment_dir=experiment_dir))
|
2018-05-01 18:51:22 +03:00
|
|
|
with virtualenv(VENV_DIR) as venv_local:
|
|
|
|
for corpus in corpora:
|
2019-09-30 14:14:48 +03:00
|
|
|
venv_local('spacy ud-train {treebank_dir} {experiment_dir} {config} {corpus} -v {vectors_dir}'.format(
|
|
|
|
treebank_dir=treebank_dir, experiment_dir=experiment_dir, config=config, corpus=corpus, vectors_dir=vectors_dir))
|
|
|
|
venv_local('spacy ud-run-test {test_data_dir} {experiment_dir} {corpus}'.format(
|
|
|
|
test_data_dir=test_data_dir, experiment_dir=experiment_dir, config=config, corpus=corpus))
|