spaCy/fabfile.py

150 lines
4.8 KiB
Python
Raw Normal View History

import contextlib
from pathlib import Path
2019-12-25 19:59:52 +03:00
from fabric.api import local, lcd
2017-01-14 23:30:36 +03:00
from os import path, environ
import shutil
2018-05-01 18:51:22 +03:00
import sys
2015-01-03 13:02:21 +03:00
PWD = path.dirname(__file__)
2019-09-29 18:32:35 +03:00
ENV = environ["VENV_DIR"] if "VENV_DIR" in environ else ".env"
VENV_DIR = Path(PWD) / ENV
2015-01-03 13:02:21 +03:00
@contextlib.contextmanager
2019-09-29 18:32:35 +03:00
def virtualenv(name, create=False, python="/usr/bin/python3.6"):
python = Path(python).resolve()
env_path = VENV_DIR
if create:
if env_path.exists():
shutil.rmtree(str(env_path))
2019-09-29 18:32:35 +03:00
local("{python} -m venv {env_path}".format(python=python, env_path=VENV_DIR))
def wrapped_local(cmd, env_vars=[], capture=False, direct=False):
2019-09-29 18:32:35 +03:00
return local(
"source {}/bin/activate && {}".format(env_path, cmd),
shell="/bin/bash",
capture=False,
)
yield wrapped_local
2019-09-29 18:32:35 +03:00
def env(lang="python3.6"):
if VENV_DIR.exists():
2019-09-29 18:32:35 +03:00
local("rm -rf {env}".format(env=VENV_DIR))
if lang.startswith("python3"):
local("{lang} -m venv {env}".format(lang=lang, env=VENV_DIR))
else:
2019-09-29 18:32:35 +03:00
local("{lang} -m pip install virtualenv --no-cache-dir".format(lang=lang))
local(
"{lang} -m virtualenv {env} --no-cache-dir".format(lang=lang, env=VENV_DIR)
)
with virtualenv(VENV_DIR) as venv_local:
2019-09-29 18:32:35 +03:00
print(venv_local("python --version", capture=True))
venv_local("pip install --upgrade setuptools --no-cache-dir")
venv_local("pip install pytest --no-cache-dir")
venv_local("pip install wheel --no-cache-dir")
venv_local("pip install -r requirements.txt --no-cache-dir")
venv_local("pip install pex --no-cache-dir")
2015-01-03 13:02:21 +03:00
def install():
with virtualenv(VENV_DIR) as venv_local:
2019-09-29 18:32:35 +03:00
venv_local("pip install dist/*.tar.gz")
2015-01-03 13:02:21 +03:00
2014-07-05 22:49:34 +04:00
def make():
with lcd(path.dirname(__file__)):
2019-09-29 18:32:35 +03:00
local(
"export PYTHONPATH=`pwd` && source .env/bin/activate && python setup.py build_ext --inplace",
shell="/bin/bash",
)
2015-01-03 13:02:21 +03:00
2017-11-07 14:11:08 +03:00
def sdist():
with virtualenv(VENV_DIR) as venv_local:
2017-11-07 14:11:08 +03:00
with lcd(path.dirname(__file__)):
2019-10-01 03:43:55 +03:00
venv_local("python -m pip install -U setuptools srsly")
venv_local("python setup.py sdist")
2019-09-29 18:32:35 +03:00
2015-01-03 13:02:21 +03:00
def wheel():
with virtualenv(VENV_DIR) as venv_local:
with lcd(path.dirname(__file__)):
2019-09-29 18:32:35 +03:00
venv_local("python setup.py bdist_wheel")
def pex():
with virtualenv(VENV_DIR) as venv_local:
with lcd(path.dirname(__file__)):
2019-09-29 18:32:35 +03:00
sha = local("git rev-parse --short HEAD", capture=True)
2019-12-25 19:59:52 +03:00
venv_local(f"pex dist/*.whl -e spacy -o dist/spacy-{sha}.pex", direct=True)
2015-01-03 13:02:21 +03:00
def clean():
with lcd(path.dirname(__file__)):
2019-09-29 18:32:35 +03:00
local("rm -f dist/*.whl")
local("rm -f dist/*.pex")
with virtualenv(VENV_DIR) as venv_local:
2019-09-29 18:32:35 +03:00
venv_local("python setup.py clean --all")
2015-01-03 13:02:21 +03:00
2014-07-05 22:49:34 +04:00
2015-01-03 13:02:21 +03:00
def test():
with virtualenv(VENV_DIR) as venv_local:
2015-01-03 13:02:21 +03:00
with lcd(path.dirname(__file__)):
2019-09-29 18:32:35 +03:00
venv_local("pytest -x spacy/tests")
def train():
2019-09-29 18:32:35 +03:00
args = environ.get("SPACY_TRAIN_ARGS", "")
with virtualenv(VENV_DIR) as venv_local:
2019-09-29 18:32:35 +03:00
venv_local("spacy train {args}".format(args=args))
2018-05-01 18:51:22 +03:00
2019-09-29 18:32:35 +03:00
def conll17(treebank_dir, experiment_dir, vectors_dir, config, corpus=""):
is_not_clean = local("git status --porcelain", capture=True)
2018-05-01 18:51:22 +03:00
if is_not_clean:
print("Repository is not clean")
print(is_not_clean)
sys.exit(1)
2019-09-29 18:32:35 +03:00
git_sha = local("git rev-parse --short HEAD", capture=True)
config_checksum = local("sha256sum {config}".format(config=config), capture=True)
experiment_dir = Path(experiment_dir) / "{}--{}".format(
config_checksum[:6], git_sha
)
2018-05-01 18:51:22 +03:00
if not experiment_dir.exists():
experiment_dir.mkdir()
2019-09-29 18:32:35 +03:00
test_data_dir = Path(treebank_dir) / "ud-test-v2.0-conll2017"
2018-05-01 18:51:22 +03:00
assert test_data_dir.exists()
assert test_data_dir.is_dir()
if corpus:
corpora = [corpus]
else:
2019-09-29 18:32:35 +03:00
corpora = ["UD_English", "UD_Chinese", "UD_Japanese", "UD_Vietnamese"]
2018-05-01 18:51:22 +03:00
2019-09-29 18:32:35 +03:00
local(
"cp {config} {experiment_dir}/config.json".format(
config=config, experiment_dir=experiment_dir
)
)
2018-05-01 18:51:22 +03:00
with virtualenv(VENV_DIR) as venv_local:
for corpus in corpora:
2019-09-29 18:32:35 +03:00
venv_local(
"spacy ud-train {treebank_dir} {experiment_dir} {config} {corpus} -v {vectors_dir}".format(
treebank_dir=treebank_dir,
experiment_dir=experiment_dir,
config=config,
corpus=corpus,
vectors_dir=vectors_dir,
)
)
venv_local(
"spacy ud-run-test {test_data_dir} {experiment_dir} {corpus}".format(
test_data_dir=test_data_dir,
experiment_dir=experiment_dir,
config=config,
corpus=corpus,
)
)