mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-28 10:14:07 +03:00
Put Mecab import in utility function
This commit is contained in:
parent
234a8a7591
commit
c435f748d7
|
@ -16,14 +16,21 @@ from collections import namedtuple
|
||||||
|
|
||||||
ShortUnitWord = namedtuple('ShortUnitWord', ['surface', 'base_form', 'part_of_speech'])
|
ShortUnitWord = namedtuple('ShortUnitWord', ['surface', 'base_form', 'part_of_speech'])
|
||||||
|
|
||||||
|
def try_mecab_import():
|
||||||
|
"""Mecab is required for Japanese support, so check for it.
|
||||||
|
|
||||||
|
It it's not available blow up and explain how to fix it."""
|
||||||
|
try:
|
||||||
|
import MeCab
|
||||||
|
return MeCab
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError("Japanese support requires MeCab: "
|
||||||
|
"https://github.com/SamuraiT/mecab-python3")
|
||||||
|
|
||||||
class JapaneseTokenizer(object):
|
class JapaneseTokenizer(object):
|
||||||
def __init__(self, cls, nlp=None):
|
def __init__(self, cls, nlp=None):
|
||||||
self.vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp)
|
self.vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp)
|
||||||
try:
|
MeCab = try_mecab_import()
|
||||||
import MeCab
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError("The Japanese tokenizer requires the MeCab library: "
|
|
||||||
"https://github.com/SamuraiT/mecab-python3")
|
|
||||||
self.tokenizer = MeCab.Tagger()
|
self.tokenizer = MeCab.Tagger()
|
||||||
|
|
||||||
def __call__(self, text):
|
def __call__(self, text):
|
||||||
|
@ -70,12 +77,7 @@ def detailed_tokens(tokenizer, text):
|
||||||
|
|
||||||
class JapaneseTagger(object):
|
class JapaneseTagger(object):
|
||||||
def __init__(self, vocab):
|
def __init__(self, vocab):
|
||||||
try:
|
MeCab = try_mecab_import()
|
||||||
import MeCab
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError("The Japanese tagger requires the MeCab library: "
|
|
||||||
"https://github.com/SamuraiT/mecab-python3")
|
|
||||||
|
|
||||||
self.tagger = Tagger(vocab)
|
self.tagger = Tagger(vocab)
|
||||||
self.tokenizer = MeCab.Tagger()
|
self.tokenizer = MeCab.Tagger()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user