* Upd Lemmatizer to use MockPackage. Replace from_package with load() classmethod

This commit is contained in:
Matthew Honnibal 2015-12-29 16:56:02 +01:00
parent 4131e45543
commit c5902f2b4b

View File

@ -8,25 +8,22 @@ except ImportError:
import json import json
from .parts_of_speech import NOUN, VERB, ADJ, PUNCT from .parts_of_speech import NOUN, VERB, ADJ, PUNCT
from .util import MockPackage
class Lemmatizer(object): class Lemmatizer(object):
@classmethod @classmethod
def from_package(cls, package): def load(cls, pkg_or_str_or_file):
pkg = MockPackage.create_or_return(pkg_or_str_or_file)
index = {} index = {}
exc = {} exc = {}
for pos in ['adj', 'noun', 'verb']: for pos in ['adj', 'noun', 'verb']:
index[pos] = package.load_utf8(read_index, with pkg.open(('wordnet', 'index.%s' % pos), default=None) as file_:
'wordnet', 'index.%s' % pos, index[pos] = read_index(file_) if file_ is not None else set()
default=set()) # TODO: really optional? with pkg.open(('wordnet', '%s.exc' % pos), default=None) as file_:
exc[pos] = package.load_utf8(read_exc, exc[pos] = read_exc(file_) if file_ is not None else {}
'wordnet', '%s.exc' % pos, with pkg.open(('vocab', 'lemma_rules.json'), default=None) as file_:
default={}) # TODO: really optional? rules = json.load(file_) if file_ is not None else {}
rules = package.load_utf8(json.load,
'vocab', 'lemma_rules.json',
default={}) # TODO: really optional?
return cls(index, exc, rules) return cls(index, exc, rules)
def __init__(self, index, exceptions, rules): def __init__(self, index, exceptions, rules):