mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 00:46:28 +03:00
Add compat functions and remove old workarounds
Add ensure_path util function to handle checking instance of path
This commit is contained in:
parent
26445ee304
commit
c05ec4b89a
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||
import pip
|
||||
from pathlib import Path
|
||||
import importlib
|
||||
from ..compat import unicode_, symlink_to
|
||||
from .. import util
|
||||
|
||||
|
||||
|
@ -43,23 +44,17 @@ def symlink(model_path, link_name, force):
|
|||
elif link_path.exists():
|
||||
link_path.unlink()
|
||||
|
||||
# Add workaround for Python 2 on Windows (see issue #909)
|
||||
if util.is_python2() and util.is_windows():
|
||||
import subprocess
|
||||
command = ['mklink', '/d', unicode(link_path), unicode(model_path)]
|
||||
try:
|
||||
subprocess.call(command, shell=True)
|
||||
except:
|
||||
# This is quite dirty, but just making sure other Windows-specific
|
||||
# errors are caught so users at least see a proper error message.
|
||||
util.sys_exit(
|
||||
"Creating a symlink in spacy/data failed. You can still import "
|
||||
"the model as a Python package and call its load() method, or "
|
||||
"create the symlink manually:",
|
||||
"{a} --> {b}".format(a=unicode(model_path), b=unicode(link_path)),
|
||||
title="Error: Couldn't link model to '{l}'".format(l=link_name))
|
||||
else:
|
||||
link_path.symlink_to(model_path)
|
||||
try:
|
||||
symlink_to(link_path, model_path)
|
||||
except:
|
||||
# This is quite dirty, but just making sure other errors are caught so
|
||||
# users at least see a proper message.
|
||||
util.sys_exit(
|
||||
"Creating a symlink in spacy/data failed. You can still import "
|
||||
"the model as a Python package and call its load() method, or "
|
||||
"create the symlink manually:",
|
||||
"{a} --> {b}".format(a=unicode_(model_path), b=unicode_(link_path)),
|
||||
title="Error: Couldn't link model to '{l}'".format(l=link_name))
|
||||
|
||||
util.print_msg(
|
||||
"{a} --> {b}".format(a=model_path.as_posix(), b=link_path.as_posix()),
|
||||
|
|
|
@ -1,20 +1,13 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
import six
|
||||
|
||||
from .. import about
|
||||
from ..compat import unicode_, json_dumps
|
||||
from .. import util
|
||||
|
||||
if six.PY2:
|
||||
json_dumps = lambda data: json.dumps(data, indent=2).decode("utf8")
|
||||
elif six.PY3:
|
||||
json_dumps = lambda data: json.dumps(data, indent=2)
|
||||
|
||||
def package(input_dir, output_dir, force):
|
||||
input_path = Path(input_dir)
|
||||
|
@ -32,31 +25,31 @@ def package(input_dir, output_dir, force):
|
|||
package_path = main_path / model_name
|
||||
|
||||
create_dirs(package_path, force)
|
||||
shutil.copytree(input_path.as_posix(), (package_path / model_name_v).as_posix())
|
||||
shutil.copytree(unicode_(input_path), unicode_(package_path / model_name_v))
|
||||
create_file(main_path / 'meta.json', json_dumps(meta))
|
||||
create_file(main_path / 'setup.py', template_setup)
|
||||
create_file(main_path / 'MANIFEST.in', template_manifest)
|
||||
create_file(package_path / '__init__.py', template_init)
|
||||
|
||||
util.print_msg(
|
||||
main_path.as_posix(),
|
||||
unicode_(main_path),
|
||||
"To build the package, run `python setup.py sdist` in that directory.",
|
||||
title="Successfully created package {p}".format(p=model_name_v))
|
||||
|
||||
|
||||
def check_dirs(input_path, output_path):
|
||||
if not input_path.exists():
|
||||
util.sys_exit(input_path.as_poisx(), title="Model directory not found")
|
||||
util.sys_exit(unicode_(input_path.as_poisx), title="Model directory not found")
|
||||
if not output_path.exists():
|
||||
util.sys_exit(output_path.as_posix(), title="Output directory not found")
|
||||
util.sys_exit(unicode_(output_path), title="Output directory not found")
|
||||
|
||||
|
||||
def create_dirs(package_path, force):
|
||||
if package_path.exists():
|
||||
if force:
|
||||
shutil.rmtree(package_path.as_posix())
|
||||
shutil.rmtree(unicode_(package_path.as_posix))
|
||||
else:
|
||||
util.sys_exit(package_path.as_posix(),
|
||||
util.sys_exit(unicode_(package_path.as_posix),
|
||||
"Please delete the directory and try again.",
|
||||
title="Package directory already exists")
|
||||
Path.mkdir(package_path, parents=True)
|
||||
|
|
|
@ -6,12 +6,6 @@ from .cli import download
|
|||
from .cli import link
|
||||
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = str
|
||||
|
||||
|
||||
def read_lang_data(package):
|
||||
tokenization = package.load_json(('tokenizer', 'specials.json'))
|
||||
with package.open(('tokenizer', 'prefix.txt'), default=None) as file_:
|
||||
|
@ -73,9 +67,7 @@ def fix_glove_vectors_loading(overrides):
|
|||
if overrides.get('path') in (None, True):
|
||||
data_path = util.get_data_path()
|
||||
else:
|
||||
path = overrides['path']
|
||||
if isinstance(path, basestring):
|
||||
path = Path(path)
|
||||
path = util.ensure_path(overrides['path'])
|
||||
data_path = path.parent
|
||||
vec_path = None
|
||||
if 'add_vectors' not in overrides:
|
||||
|
|
|
@ -4,17 +4,6 @@ from contextlib import contextmanager
|
|||
import shutil
|
||||
import ujson
|
||||
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = str
|
||||
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
unicode = str
|
||||
|
||||
from .tokenizer import Tokenizer
|
||||
from .vocab import Vocab
|
||||
from .tagger import Tagger
|
||||
|
@ -26,6 +15,7 @@ from .syntax.nonproj import PseudoProjectivity
|
|||
from .pipeline import DependencyParser, EntityRecognizer
|
||||
from .syntax.arc_eager import ArcEager
|
||||
from .syntax.ner import BiluoPushDown
|
||||
from .compat import unicode_
|
||||
from .attrs import IS_STOP
|
||||
from . import attrs
|
||||
from . import orth
|
||||
|
@ -205,7 +195,7 @@ class Language(object):
|
|||
directory.mkdir()
|
||||
with (directory / 'config.json').open('wb') as file_:
|
||||
data = ujson.dumps(config, indent=2)
|
||||
if isinstance(data, unicode):
|
||||
if isinstance(data, unicode_):
|
||||
data = data.encode('utf8')
|
||||
file_.write(data)
|
||||
if not (path / 'vocab').exists():
|
||||
|
@ -252,9 +242,7 @@ class Language(object):
|
|||
def __init__(self, **overrides):
|
||||
if 'data_dir' in overrides and 'path' not in overrides:
|
||||
raise ValueError("The argument 'data_dir' has been renamed to 'path'")
|
||||
path = overrides.get('path', True)
|
||||
if isinstance(path, basestring):
|
||||
path = pathlib.Path(path)
|
||||
path = util.ensure_path(overrides.get('path', True))
|
||||
if path is True:
|
||||
path = util.get_data_path() / self.lang
|
||||
if not path.exists() and 'path' not in overrides:
|
||||
|
|
|
@ -15,6 +15,7 @@ from .tokens.doc cimport Doc
|
|||
from .attrs cimport TAG
|
||||
from .gold cimport GoldParse
|
||||
from .attrs cimport *
|
||||
from . import util
|
||||
|
||||
|
||||
cpdef enum:
|
||||
|
@ -127,7 +128,7 @@ cdef class Tagger:
|
|||
"""
|
||||
# TODO: Change this to expect config.json when we don't have to
|
||||
# support old data.
|
||||
path = path if not isinstance(path, basestring) else pathlib.Path(path)
|
||||
path = util.ensure_path(path)
|
||||
if (path / 'templates.json').exists():
|
||||
with (path / 'templates.json').open('r', encoding='utf8') as file_:
|
||||
templates = json.load(file_)
|
||||
|
|
|
@ -48,10 +48,8 @@ cdef class Tokenizer:
|
|||
infix_finditer:
|
||||
Signature of re.compile(string).finditer
|
||||
Returns Tokenizer
|
||||
if isinstance(path, basestring):
|
||||
path = pathlib.Path(path)
|
||||
|
||||
"""
|
||||
path = util.ensure_path(path)
|
||||
if rules is None:
|
||||
with (path / 'tokenizer' / 'specials.json').open('r', encoding='utf8') as file_:
|
||||
rules = json.load(file_)
|
||||
|
|
|
@ -8,17 +8,7 @@ from pathlib import Path
|
|||
import sys
|
||||
import textwrap
|
||||
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = str
|
||||
|
||||
|
||||
try:
|
||||
raw_input
|
||||
except NameError: # Python 3
|
||||
raw_input = input
|
||||
from .compat import basestring_, unicode_, input_
|
||||
|
||||
|
||||
LANGUAGES = {}
|
||||
|
@ -46,9 +36,14 @@ def get_data_path(require_exists=True):
|
|||
|
||||
def set_data_path(path):
|
||||
global _data_path
|
||||
if isinstance(path, basestring):
|
||||
path = pathlib.Path(path)
|
||||
_data_path = path
|
||||
_data_path = ensure_path(path)
|
||||
|
||||
|
||||
def ensure_path(path):
|
||||
if isinstance(path, basestring_):
|
||||
return Path(path)
|
||||
else:
|
||||
return path
|
||||
|
||||
|
||||
def or_(val1, val2):
|
||||
|
@ -94,7 +89,7 @@ def constraint_match(constraint_string, version):
|
|||
|
||||
|
||||
def read_regex(path):
|
||||
path = path if not isinstance(path, basestring) else pathlib.Path(path)
|
||||
path = ensure_path(path)
|
||||
with path.open() as file_:
|
||||
entries = file_.read().split('\n')
|
||||
expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()])
|
||||
|
@ -151,16 +146,6 @@ def check_renamed_kwargs(renamed, kwargs):
|
|||
raise TypeError("Keyword argument %s now renamed to %s" % (old, new))
|
||||
|
||||
|
||||
def is_windows():
|
||||
"""Check if user is on Windows."""
|
||||
return sys.platform.startswith('win')
|
||||
|
||||
|
||||
def is_python2():
|
||||
"""Check if Python 2 is used."""
|
||||
return sys.version.startswith('2.')
|
||||
|
||||
|
||||
def parse_package_meta(package_path, package, require=True):
|
||||
location = package_path / package / 'meta.json'
|
||||
if location.is_file():
|
||||
|
@ -180,7 +165,7 @@ def get_raw_input(description, default=False):
|
|||
|
||||
additional = ' (default: {d})'.format(d=default) if default else ''
|
||||
prompt = ' {d}{a}: '.format(d=description, a=additional)
|
||||
user_input = raw_input(prompt)
|
||||
user_input = input_(prompt)
|
||||
return user_input
|
||||
|
||||
|
||||
|
|
|
@ -5,11 +5,6 @@ import bz2
|
|||
import ujson as json
|
||||
import re
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError:
|
||||
import pickle
|
||||
|
||||
from libc.string cimport memset
|
||||
from libc.stdint cimport int32_t
|
||||
from libc.math cimport sqrt
|
||||
|
@ -23,10 +18,7 @@ from .tokens.token cimport Token
|
|||
from .serialize.packer cimport Packer
|
||||
from .attrs cimport PROB, LANG
|
||||
|
||||
try:
|
||||
import copy_reg
|
||||
except ImportError:
|
||||
import copyreg as copy_reg
|
||||
from .compat import copy_reg, pickle
|
||||
from .lemmatizer import Lemmatizer
|
||||
from .attrs import intify_attrs
|
||||
from . import util
|
||||
|
@ -69,8 +61,7 @@ cdef class Vocab:
|
|||
Returns:
|
||||
Vocab: The newly constructed vocab object.
|
||||
"""
|
||||
if isinstance(path, basestring):
|
||||
path = Path(path)
|
||||
path = util.ensure_path(path)
|
||||
util.check_renamed_kwargs({'get_lex_attr': 'lex_attr_getters'}, deprecated_kwargs)
|
||||
if 'vectors' in deprecated_kwargs:
|
||||
raise AttributeError(
|
||||
|
|
Loading…
Reference in New Issue
Block a user