add lang registration facility

This commit is contained in:
Henning Peters 2016-03-25 18:54:45 +01:00
parent 963570aa49
commit b8f63071eb
6 changed files with 43 additions and 34 deletions

View File

@ -1,13 +1,16 @@
from . import util from . import util
from .about import __models__
import importlib from .en import English
from .de import German
from . import util
util.register_lang(English.lang, English)
util.register_lang(German.lang, German)
def load(name, vectors=None, via=None): def load(name, vectors=None, via=None):
if name not in __models__: package = util.get_package_by_name(name, via=via)
raise Exception('Model %s not found.' % name) vectors_package = util.get_package_by_name(vectors, via=via)
cls = util.get_lang(name)
mod = importlib.import_module('.%s' % __models__[name]['module'], 'spacy') return cls(package=package, vectors_package=vectors_package)
return getattr(mod, __models__[name]['class'])(
package=util.get_package_by_name(name, via=via),
vectors_package=util.get_package_by_name(vectors, via=via))

View File

@ -11,15 +11,7 @@ __author__ = 'Matthew Honnibal'
__email__ = 'matt@spacy.io' __email__ = 'matt@spacy.io'
__license__ = 'MIT' __license__ = 'MIT'
__models__ = { __models__ = {
'en': { 'en': 'en>=1.0.0,<1.1.0',
'module': 'en', 'de': 'de>=1.0.0,<1.1.0',
'class': 'English',
'package': 'en>=1.0.0,<1.1.0',
},
'de': {
'module': 'de',
'class': 'German',
'package': 'de>=1.0.0,<1.1.0',
},
} }
__default_model__ = 'en' __default_lang__ = 'en'

View File

@ -14,17 +14,17 @@ def download(lang, force=False):
sputnik.purge(about.__title__, about.__version__) sputnik.purge(about.__title__, about.__version__)
try: try:
sputnik.package(about.__title__, about.__version__, about.__models__[lang]['package']) sputnik.package(about.__title__, about.__version__, about.__models__[lang])
print("Model already installed. Please run 'python -m " print("Model already installed. Please run 'python -m "
"spacy.%s.download --force' to reinstall." % lang, file=sys.stderr) "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
sys.exit(1) sys.exit(1)
except (PackageNotFoundException, CompatiblePackageNotFoundException): except (PackageNotFoundException, CompatiblePackageNotFoundException):
pass pass
package = sputnik.install(about.__title__, about.__version__, about.__models__[lang]['package']) package = sputnik.install(about.__title__, about.__version__, about.__models__[lang])
try: try:
sputnik.package(about.__title__, about.__version__, about.__models__[lang]['package']) sputnik.package(about.__title__, about.__version__, about.__models__[lang])
except (PackageNotFoundException, CompatiblePackageNotFoundException): except (PackageNotFoundException, CompatiblePackageNotFoundException):
print("Model failed to install. Please run 'python -m " print("Model failed to install. Please run 'python -m "
"spacy.%s.download --force'." % lang, file=sys.stderr) "spacy.%s.download --force'." % lang, file=sys.stderr)

View File

@ -16,8 +16,7 @@ cimport cython
from . import util from . import util
from .tokens.doc cimport Doc from .tokens.doc cimport Doc
from .util import read_lang_data from .util import read_lang_data, get_package
from .util import get_package
cdef class Tokenizer: cdef class Tokenizer:

View File

@ -14,6 +14,21 @@ from . import about
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
LANGUAGES = {}
def register_lang(name, cls):
global LANGUAGES
LANGUAGES[name] = cls
def get_lang(name):
lang = re.split('[^a-zA-Z0-9_]', name, 1)[0]
if lang not in LANGUAGES:
raise RuntimeError('Language not supported: %s' % lang)
return LANGUAGES[lang]
def get_package(data_dir): def get_package(data_dir):
if not isinstance(data_dir, six.string_types): if not isinstance(data_dir, six.string_types):
raise RuntimeError('data_dir must be a string') raise RuntimeError('data_dir must be a string')
@ -21,19 +36,20 @@ def get_package(data_dir):
def get_package_by_name(name=None, via=None): def get_package_by_name(name=None, via=None):
package_name = name or about.__models__[about.__default_lang__]
lang = get_lang(package_name)
try: try:
return sputnik.package(about.__title__, about.__version__, return sputnik.package(about.__title__, about.__version__,
name or about.__models__[about.__default_model__]['package'], package_name, data_path=via)
data_path=via)
except PackageNotFoundException as e: except PackageNotFoundException as e:
raise RuntimeError("Model %s not installed. Please run 'python -m " raise RuntimeError("Model '%s' not installed. Please run 'python -m "
"spacy.%s.download' to install latest compatible " "%s.download' to install latest compatible "
"model." % (name, about.__models__[name]['module'])) "model." % (name, lang.__module__))
except CompatiblePackageNotFoundException as e: except CompatiblePackageNotFoundException as e:
raise RuntimeError("Installed model %s is not compatible with spaCy " raise RuntimeError("Installed model is not compatible with spaCy "
"version. Please run 'python -m spacy.%s.download " "version. Please run 'python -m %s.download "
"--force' to install latest compatible model." % "--force' to install latest compatible model." %
(name, about.__models__[name]['module'])) (lang.__module__))
def normalize_slice(length, start, stop, step=None): def normalize_slice(length, start, stop, step=None):

View File

@ -25,7 +25,6 @@ from . import attrs
from . import symbols from . import symbols
from cymem.cymem cimport Address from cymem.cymem cimport Address
from . import util
from .serialize.packer cimport Packer from .serialize.packer cimport Packer
from .attrs cimport PROB from .attrs cimport PROB