mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
* Upd Lemmatizer to use MockPackage. Replace from_package with load() classmethod
This commit is contained in:
parent
4131e45543
commit
c5902f2b4b
|
@ -8,25 +8,22 @@ except ImportError:
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from .parts_of_speech import NOUN, VERB, ADJ, PUNCT
|
from .parts_of_speech import NOUN, VERB, ADJ, PUNCT
|
||||||
|
from .util import MockPackage
|
||||||
|
|
||||||
|
|
||||||
class Lemmatizer(object):
|
class Lemmatizer(object):
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_package(cls, package):
|
def load(cls, pkg_or_str_or_file):
|
||||||
|
pkg = MockPackage.create_or_return(pkg_or_str_or_file)
|
||||||
index = {}
|
index = {}
|
||||||
exc = {}
|
exc = {}
|
||||||
for pos in ['adj', 'noun', 'verb']:
|
for pos in ['adj', 'noun', 'verb']:
|
||||||
index[pos] = package.load_utf8(read_index,
|
with pkg.open(('wordnet', 'index.%s' % pos), default=None) as file_:
|
||||||
'wordnet', 'index.%s' % pos,
|
index[pos] = read_index(file_) if file_ is not None else set()
|
||||||
default=set()) # TODO: really optional?
|
with pkg.open(('wordnet', '%s.exc' % pos), default=None) as file_:
|
||||||
exc[pos] = package.load_utf8(read_exc,
|
exc[pos] = read_exc(file_) if file_ is not None else {}
|
||||||
'wordnet', '%s.exc' % pos,
|
with pkg.open(('vocab', 'lemma_rules.json'), default=None) as file_:
|
||||||
default={}) # TODO: really optional?
|
rules = json.load(file_) if file_ is not None else {}
|
||||||
|
|
||||||
rules = package.load_utf8(json.load,
|
|
||||||
'vocab', 'lemma_rules.json',
|
|
||||||
default={}) # TODO: really optional?
|
|
||||||
|
|
||||||
return cls(index, exc, rules)
|
return cls(index, exc, rules)
|
||||||
|
|
||||||
def __init__(self, index, exceptions, rules):
|
def __init__(self, index, exceptions, rules):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user