Add compat functions and remove old workarounds

Add ensure_path util function to handle checking instance of path
This commit is contained in:
ines 2017-04-15 12:11:16 +02:00
parent 26445ee304
commit c05ec4b89a
8 changed files with 39 additions and 96 deletions

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
import pip import pip
from pathlib import Path from pathlib import Path
import importlib import importlib
from ..compat import unicode_, symlink_to
from .. import util from .. import util
@ -43,23 +44,17 @@ def symlink(model_path, link_name, force):
elif link_path.exists(): elif link_path.exists():
link_path.unlink() link_path.unlink()
# Add workaround for Python 2 on Windows (see issue #909)
if util.is_python2() and util.is_windows():
import subprocess
command = ['mklink', '/d', unicode(link_path), unicode(model_path)]
try: try:
subprocess.call(command, shell=True) symlink_to(link_path, model_path)
except: except:
# This is quite dirty, but just making sure other Windows-specific # This is quite dirty, but just making sure other errors are caught so
# errors are caught so users at least see a proper error message. # users at least see a proper message.
util.sys_exit( util.sys_exit(
"Creating a symlink in spacy/data failed. You can still import " "Creating a symlink in spacy/data failed. You can still import "
"the model as a Python package and call its load() method, or " "the model as a Python package and call its load() method, or "
"create the symlink manually:", "create the symlink manually:",
"{a} --> {b}".format(a=unicode(model_path), b=unicode(link_path)), "{a} --> {b}".format(a=unicode_(model_path), b=unicode_(link_path)),
title="Error: Couldn't link model to '{l}'".format(l=link_name)) title="Error: Couldn't link model to '{l}'".format(l=link_name))
else:
link_path.symlink_to(model_path)
util.print_msg( util.print_msg(
"{a} --> {b}".format(a=model_path.as_posix(), b=link_path.as_posix()), "{a} --> {b}".format(a=model_path.as_posix(), b=link_path.as_posix()),

View File

@ -1,20 +1,13 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import shutil import shutil
import requests import requests
from pathlib import Path from pathlib import Path
import six from ..compat import unicode_, json_dumps
from .. import about
from .. import util from .. import util
if six.PY2:
json_dumps = lambda data: json.dumps(data, indent=2).decode("utf8")
elif six.PY3:
json_dumps = lambda data: json.dumps(data, indent=2)
def package(input_dir, output_dir, force): def package(input_dir, output_dir, force):
input_path = Path(input_dir) input_path = Path(input_dir)
@ -32,31 +25,31 @@ def package(input_dir, output_dir, force):
package_path = main_path / model_name package_path = main_path / model_name
create_dirs(package_path, force) create_dirs(package_path, force)
shutil.copytree(input_path.as_posix(), (package_path / model_name_v).as_posix()) shutil.copytree(unicode_(input_path), unicode_(package_path / model_name_v))
create_file(main_path / 'meta.json', json_dumps(meta)) create_file(main_path / 'meta.json', json_dumps(meta))
create_file(main_path / 'setup.py', template_setup) create_file(main_path / 'setup.py', template_setup)
create_file(main_path / 'MANIFEST.in', template_manifest) create_file(main_path / 'MANIFEST.in', template_manifest)
create_file(package_path / '__init__.py', template_init) create_file(package_path / '__init__.py', template_init)
util.print_msg( util.print_msg(
main_path.as_posix(), unicode_(main_path),
"To build the package, run `python setup.py sdist` in that directory.", "To build the package, run `python setup.py sdist` in that directory.",
title="Successfully created package {p}".format(p=model_name_v)) title="Successfully created package {p}".format(p=model_name_v))
def check_dirs(input_path, output_path): def check_dirs(input_path, output_path):
if not input_path.exists(): if not input_path.exists():
util.sys_exit(input_path.as_poisx(), title="Model directory not found") util.sys_exit(unicode_(input_path.as_poisx), title="Model directory not found")
if not output_path.exists(): if not output_path.exists():
util.sys_exit(output_path.as_posix(), title="Output directory not found") util.sys_exit(unicode_(output_path), title="Output directory not found")
def create_dirs(package_path, force): def create_dirs(package_path, force):
if package_path.exists(): if package_path.exists():
if force: if force:
shutil.rmtree(package_path.as_posix()) shutil.rmtree(unicode_(package_path.as_posix))
else: else:
util.sys_exit(package_path.as_posix(), util.sys_exit(unicode_(package_path.as_posix),
"Please delete the directory and try again.", "Please delete the directory and try again.",
title="Package directory already exists") title="Package directory already exists")
Path.mkdir(package_path, parents=True) Path.mkdir(package_path, parents=True)

View File

@ -6,12 +6,6 @@ from .cli import download
from .cli import link from .cli import link
try:
basestring
except NameError:
basestring = str
def read_lang_data(package): def read_lang_data(package):
tokenization = package.load_json(('tokenizer', 'specials.json')) tokenization = package.load_json(('tokenizer', 'specials.json'))
with package.open(('tokenizer', 'prefix.txt'), default=None) as file_: with package.open(('tokenizer', 'prefix.txt'), default=None) as file_:
@ -73,9 +67,7 @@ def fix_glove_vectors_loading(overrides):
if overrides.get('path') in (None, True): if overrides.get('path') in (None, True):
data_path = util.get_data_path() data_path = util.get_data_path()
else: else:
path = overrides['path'] path = util.ensure_path(overrides['path'])
if isinstance(path, basestring):
path = Path(path)
data_path = path.parent data_path = path.parent
vec_path = None vec_path = None
if 'add_vectors' not in overrides: if 'add_vectors' not in overrides:

View File

@ -4,17 +4,6 @@ from contextlib import contextmanager
import shutil import shutil
import ujson import ujson
try:
basestring
except NameError:
basestring = str
try:
unicode
except NameError:
unicode = str
from .tokenizer import Tokenizer from .tokenizer import Tokenizer
from .vocab import Vocab from .vocab import Vocab
from .tagger import Tagger from .tagger import Tagger
@ -26,6 +15,7 @@ from .syntax.nonproj import PseudoProjectivity
from .pipeline import DependencyParser, EntityRecognizer from .pipeline import DependencyParser, EntityRecognizer
from .syntax.arc_eager import ArcEager from .syntax.arc_eager import ArcEager
from .syntax.ner import BiluoPushDown from .syntax.ner import BiluoPushDown
from .compat import unicode_
from .attrs import IS_STOP from .attrs import IS_STOP
from . import attrs from . import attrs
from . import orth from . import orth
@ -205,7 +195,7 @@ class Language(object):
directory.mkdir() directory.mkdir()
with (directory / 'config.json').open('wb') as file_: with (directory / 'config.json').open('wb') as file_:
data = ujson.dumps(config, indent=2) data = ujson.dumps(config, indent=2)
if isinstance(data, unicode): if isinstance(data, unicode_):
data = data.encode('utf8') data = data.encode('utf8')
file_.write(data) file_.write(data)
if not (path / 'vocab').exists(): if not (path / 'vocab').exists():
@ -252,9 +242,7 @@ class Language(object):
def __init__(self, **overrides): def __init__(self, **overrides):
if 'data_dir' in overrides and 'path' not in overrides: if 'data_dir' in overrides and 'path' not in overrides:
raise ValueError("The argument 'data_dir' has been renamed to 'path'") raise ValueError("The argument 'data_dir' has been renamed to 'path'")
path = overrides.get('path', True) path = util.ensure_path(overrides.get('path', True))
if isinstance(path, basestring):
path = pathlib.Path(path)
if path is True: if path is True:
path = util.get_data_path() / self.lang path = util.get_data_path() / self.lang
if not path.exists() and 'path' not in overrides: if not path.exists() and 'path' not in overrides:

View File

@ -15,6 +15,7 @@ from .tokens.doc cimport Doc
from .attrs cimport TAG from .attrs cimport TAG
from .gold cimport GoldParse from .gold cimport GoldParse
from .attrs cimport * from .attrs cimport *
from . import util
cpdef enum: cpdef enum:
@ -127,7 +128,7 @@ cdef class Tagger:
""" """
# TODO: Change this to expect config.json when we don't have to # TODO: Change this to expect config.json when we don't have to
# support old data. # support old data.
path = path if not isinstance(path, basestring) else pathlib.Path(path) path = util.ensure_path(path)
if (path / 'templates.json').exists(): if (path / 'templates.json').exists():
with (path / 'templates.json').open('r', encoding='utf8') as file_: with (path / 'templates.json').open('r', encoding='utf8') as file_:
templates = json.load(file_) templates = json.load(file_)

View File

@ -48,10 +48,8 @@ cdef class Tokenizer:
infix_finditer: infix_finditer:
Signature of re.compile(string).finditer Signature of re.compile(string).finditer
Returns Tokenizer Returns Tokenizer
if isinstance(path, basestring):
path = pathlib.Path(path)
""" """
path = util.ensure_path(path)
if rules is None: if rules is None:
with (path / 'tokenizer' / 'specials.json').open('r', encoding='utf8') as file_: with (path / 'tokenizer' / 'specials.json').open('r', encoding='utf8') as file_:
rules = json.load(file_) rules = json.load(file_)

View File

@ -8,17 +8,7 @@ from pathlib import Path
import sys import sys
import textwrap import textwrap
from .compat import basestring_, unicode_, input_
try:
basestring
except NameError:
basestring = str
try:
raw_input
except NameError: # Python 3
raw_input = input
LANGUAGES = {} LANGUAGES = {}
@ -46,9 +36,14 @@ def get_data_path(require_exists=True):
def set_data_path(path): def set_data_path(path):
global _data_path global _data_path
if isinstance(path, basestring): _data_path = ensure_path(path)
path = pathlib.Path(path)
_data_path = path
def ensure_path(path):
if isinstance(path, basestring_):
return Path(path)
else:
return path
def or_(val1, val2): def or_(val1, val2):
@ -94,7 +89,7 @@ def constraint_match(constraint_string, version):
def read_regex(path): def read_regex(path):
path = path if not isinstance(path, basestring) else pathlib.Path(path) path = ensure_path(path)
with path.open() as file_: with path.open() as file_:
entries = file_.read().split('\n') entries = file_.read().split('\n')
expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()]) expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()])
@ -151,16 +146,6 @@ def check_renamed_kwargs(renamed, kwargs):
raise TypeError("Keyword argument %s now renamed to %s" % (old, new)) raise TypeError("Keyword argument %s now renamed to %s" % (old, new))
def is_windows():
"""Check if user is on Windows."""
return sys.platform.startswith('win')
def is_python2():
"""Check if Python 2 is used."""
return sys.version.startswith('2.')
def parse_package_meta(package_path, package, require=True): def parse_package_meta(package_path, package, require=True):
location = package_path / package / 'meta.json' location = package_path / package / 'meta.json'
if location.is_file(): if location.is_file():
@ -180,7 +165,7 @@ def get_raw_input(description, default=False):
additional = ' (default: {d})'.format(d=default) if default else '' additional = ' (default: {d})'.format(d=default) if default else ''
prompt = ' {d}{a}: '.format(d=description, a=additional) prompt = ' {d}{a}: '.format(d=description, a=additional)
user_input = raw_input(prompt) user_input = input_(prompt)
return user_input return user_input

View File

@ -5,11 +5,6 @@ import bz2
import ujson as json import ujson as json
import re import re
try:
import cPickle as pickle
except ImportError:
import pickle
from libc.string cimport memset from libc.string cimport memset
from libc.stdint cimport int32_t from libc.stdint cimport int32_t
from libc.math cimport sqrt from libc.math cimport sqrt
@ -23,10 +18,7 @@ from .tokens.token cimport Token
from .serialize.packer cimport Packer from .serialize.packer cimport Packer
from .attrs cimport PROB, LANG from .attrs cimport PROB, LANG
try: from .compat import copy_reg, pickle
import copy_reg
except ImportError:
import copyreg as copy_reg
from .lemmatizer import Lemmatizer from .lemmatizer import Lemmatizer
from .attrs import intify_attrs from .attrs import intify_attrs
from . import util from . import util
@ -69,8 +61,7 @@ cdef class Vocab:
Returns: Returns:
Vocab: The newly constructed vocab object. Vocab: The newly constructed vocab object.
""" """
if isinstance(path, basestring): path = util.ensure_path(path)
path = Path(path)
util.check_renamed_kwargs({'get_lex_attr': 'lex_attr_getters'}, deprecated_kwargs) util.check_renamed_kwargs({'get_lex_attr': 'lex_attr_getters'}, deprecated_kwargs)
if 'vectors' in deprecated_kwargs: if 'vectors' in deprecated_kwargs:
raise AttributeError( raise AttributeError(