mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
add lang registration facility
This commit is contained in:
parent
963570aa49
commit
b8f63071eb
|
@ -1,13 +1,16 @@
|
||||||
from . import util
|
from . import util
|
||||||
from .about import __models__
|
|
||||||
import importlib
|
from .en import English
|
||||||
|
from .de import German
|
||||||
|
from . import util
|
||||||
|
|
||||||
|
|
||||||
|
util.register_lang(English.lang, English)
|
||||||
|
util.register_lang(German.lang, German)
|
||||||
|
|
||||||
|
|
||||||
def load(name, vectors=None, via=None):
|
def load(name, vectors=None, via=None):
|
||||||
if name not in __models__:
|
package = util.get_package_by_name(name, via=via)
|
||||||
raise Exception('Model %s not found.' % name)
|
vectors_package = util.get_package_by_name(vectors, via=via)
|
||||||
|
cls = util.get_lang(name)
|
||||||
mod = importlib.import_module('.%s' % __models__[name]['module'], 'spacy')
|
return cls(package=package, vectors_package=vectors_package)
|
||||||
return getattr(mod, __models__[name]['class'])(
|
|
||||||
package=util.get_package_by_name(name, via=via),
|
|
||||||
vectors_package=util.get_package_by_name(vectors, via=via))
|
|
||||||
|
|
|
@ -11,15 +11,7 @@ __author__ = 'Matthew Honnibal'
|
||||||
__email__ = 'matt@spacy.io'
|
__email__ = 'matt@spacy.io'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__models__ = {
|
__models__ = {
|
||||||
'en': {
|
'en': 'en>=1.0.0,<1.1.0',
|
||||||
'module': 'en',
|
'de': 'de>=1.0.0,<1.1.0',
|
||||||
'class': 'English',
|
|
||||||
'package': 'en>=1.0.0,<1.1.0',
|
|
||||||
},
|
|
||||||
'de': {
|
|
||||||
'module': 'de',
|
|
||||||
'class': 'German',
|
|
||||||
'package': 'de>=1.0.0,<1.1.0',
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
__default_model__ = 'en'
|
__default_lang__ = 'en'
|
||||||
|
|
|
@ -14,17 +14,17 @@ def download(lang, force=False):
|
||||||
sputnik.purge(about.__title__, about.__version__)
|
sputnik.purge(about.__title__, about.__version__)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sputnik.package(about.__title__, about.__version__, about.__models__[lang]['package'])
|
sputnik.package(about.__title__, about.__version__, about.__models__[lang])
|
||||||
print("Model already installed. Please run 'python -m "
|
print("Model already installed. Please run 'python -m "
|
||||||
"spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
|
"spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
except (PackageNotFoundException, CompatiblePackageNotFoundException):
|
except (PackageNotFoundException, CompatiblePackageNotFoundException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
package = sputnik.install(about.__title__, about.__version__, about.__models__[lang]['package'])
|
package = sputnik.install(about.__title__, about.__version__, about.__models__[lang])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sputnik.package(about.__title__, about.__version__, about.__models__[lang]['package'])
|
sputnik.package(about.__title__, about.__version__, about.__models__[lang])
|
||||||
except (PackageNotFoundException, CompatiblePackageNotFoundException):
|
except (PackageNotFoundException, CompatiblePackageNotFoundException):
|
||||||
print("Model failed to install. Please run 'python -m "
|
print("Model failed to install. Please run 'python -m "
|
||||||
"spacy.%s.download --force'." % lang, file=sys.stderr)
|
"spacy.%s.download --force'." % lang, file=sys.stderr)
|
||||||
|
|
|
@ -16,8 +16,7 @@ cimport cython
|
||||||
|
|
||||||
from . import util
|
from . import util
|
||||||
from .tokens.doc cimport Doc
|
from .tokens.doc cimport Doc
|
||||||
from .util import read_lang_data
|
from .util import read_lang_data, get_package
|
||||||
from .util import get_package
|
|
||||||
|
|
||||||
|
|
||||||
cdef class Tokenizer:
|
cdef class Tokenizer:
|
||||||
|
|
|
@ -14,6 +14,21 @@ from . import about
|
||||||
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
|
from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
|
||||||
|
|
||||||
|
|
||||||
|
LANGUAGES = {}
|
||||||
|
|
||||||
|
|
||||||
|
def register_lang(name, cls):
|
||||||
|
global LANGUAGES
|
||||||
|
LANGUAGES[name] = cls
|
||||||
|
|
||||||
|
|
||||||
|
def get_lang(name):
|
||||||
|
lang = re.split('[^a-zA-Z0-9_]', name, 1)[0]
|
||||||
|
if lang not in LANGUAGES:
|
||||||
|
raise RuntimeError('Language not supported: %s' % lang)
|
||||||
|
return LANGUAGES[lang]
|
||||||
|
|
||||||
|
|
||||||
def get_package(data_dir):
|
def get_package(data_dir):
|
||||||
if not isinstance(data_dir, six.string_types):
|
if not isinstance(data_dir, six.string_types):
|
||||||
raise RuntimeError('data_dir must be a string')
|
raise RuntimeError('data_dir must be a string')
|
||||||
|
@ -21,19 +36,20 @@ def get_package(data_dir):
|
||||||
|
|
||||||
|
|
||||||
def get_package_by_name(name=None, via=None):
|
def get_package_by_name(name=None, via=None):
|
||||||
|
package_name = name or about.__models__[about.__default_lang__]
|
||||||
|
lang = get_lang(package_name)
|
||||||
try:
|
try:
|
||||||
return sputnik.package(about.__title__, about.__version__,
|
return sputnik.package(about.__title__, about.__version__,
|
||||||
name or about.__models__[about.__default_model__]['package'],
|
package_name, data_path=via)
|
||||||
data_path=via)
|
|
||||||
except PackageNotFoundException as e:
|
except PackageNotFoundException as e:
|
||||||
raise RuntimeError("Model %s not installed. Please run 'python -m "
|
raise RuntimeError("Model '%s' not installed. Please run 'python -m "
|
||||||
"spacy.%s.download' to install latest compatible "
|
"%s.download' to install latest compatible "
|
||||||
"model." % (name, about.__models__[name]['module']))
|
"model." % (name, lang.__module__))
|
||||||
except CompatiblePackageNotFoundException as e:
|
except CompatiblePackageNotFoundException as e:
|
||||||
raise RuntimeError("Installed model %s is not compatible with spaCy "
|
raise RuntimeError("Installed model is not compatible with spaCy "
|
||||||
"version. Please run 'python -m spacy.%s.download "
|
"version. Please run 'python -m %s.download "
|
||||||
"--force' to install latest compatible model." %
|
"--force' to install latest compatible model." %
|
||||||
(name, about.__models__[name]['module']))
|
(lang.__module__))
|
||||||
|
|
||||||
|
|
||||||
def normalize_slice(length, start, stop, step=None):
|
def normalize_slice(length, start, stop, step=None):
|
||||||
|
|
|
@ -25,7 +25,6 @@ from . import attrs
|
||||||
from . import symbols
|
from . import symbols
|
||||||
|
|
||||||
from cymem.cymem cimport Address
|
from cymem.cymem cimport Address
|
||||||
from . import util
|
|
||||||
from .serialize.packer cimport Packer
|
from .serialize.packer cimport Packer
|
||||||
from .attrs cimport PROB
|
from .attrs cimport PROB
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user