* Add MockPackage class, to see whether we can proxy for Sputnik in a lightweight way

This commit is contained in:
Matthew Honnibal 2015-12-29 16:55:03 +01:00
parent f5dea1406d
commit 4131e45543

View File

@ -2,23 +2,74 @@ import os
import io import io
import json import json
import re import re
import os.path
from sputnik import Sputnik from contextlib import contextmanager
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
def get_package(name=None, data_path=None): def local_path(subdir):
if data_path is None: return os.path.abspath(os.path.join(os.path.dirname(__file__), 'data'))
if os.environ.get('SPACY_DATA'):
data_path = os.environ.get('SPACY_DATA')
else:
data_path = os.path.abspath(
os.path.join(os.path.dirname(__file__), 'data'))
sputnik = Sputnik('spacy', '0.100.0') # TODO: retrieve version
pool = sputnik.pool(data_path) class MockPackage(object):
return pool.get(name or 'en_default') @classmethod
def create_or_return(cls, me_or_arg):
return me_or_arg if isinstance(me_or_arg, cls) else me_or_arg
def __init__(self, data_path=None):
if data_path is None:
data_path = local_path('data')
self.name = None
self.data_path = data_path
self._root = self.data_path
def get(self, key):
pass
def has_file(self, *path_parts):
return os.path.exists(os.path.join(self._root, *path_parts))
def file_path(self, *path_parts, **kwargs):
return os.path.join(self._root, *path_parts)
def dir_path(self, *path_parts, **kwargs):
return os.path.join(self._root, *path_parts)
def load_utf8(self, func, *path_parts, **kwargs):
if kwargs.get('require', True):
with io.open(self.file_path(os.path.join(*path_parts)),
mode='r', encoding='utf8') as f:
return func(f)
else:
return None
@contextmanager
def open(self, path_parts, default=IOError):
if isinstance(default, Exception):
raise default
# Enter
file_ = io.open(self.file_path(os.path.join(*path_parts)),
mode='r', encoding='utf8')
yield file_
# Exit
file_.close()
def get_package(name=None, data_path=None):
return MockPackage(data_path)
#if data_path is None:
# if os.environ.get('SPACY_DATA'):
# data_path = os.environ.get('SPACY_DATA')
# else:
# data_path = os.path.abspath(
# os.path.join(os.path.dirname(__file__), 'data'))
#sputnik = Sputnik('spacy', '0.100.0') # TODO: retrieve version
#pool = sputnik.pool(data_path)
#return pool.get(name or 'en_default')
def normalize_slice(length, start, stop, step=None): def normalize_slice(length, start, stop, step=None):