From c5902f2b4bb4f5b0154cf12064b3180e02f5f960 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 29 Dec 2015 16:56:02 +0100 Subject: [PATCH] * Upd Lemmatizer to use MockPackage. Replace from_package with load() classmethod --- spacy/lemmatizer.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index c5b9c1c50..7cd37a331 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -8,25 +8,22 @@ except ImportError: import json from .parts_of_speech import NOUN, VERB, ADJ, PUNCT +from .util import MockPackage class Lemmatizer(object): @classmethod - def from_package(cls, package): + def load(cls, pkg_or_str_or_file): + pkg = MockPackage.create_or_return(pkg_or_str_or_file) index = {} exc = {} for pos in ['adj', 'noun', 'verb']: - index[pos] = package.load_utf8(read_index, - 'wordnet', 'index.%s' % pos, - default=set()) # TODO: really optional? - exc[pos] = package.load_utf8(read_exc, - 'wordnet', '%s.exc' % pos, - default={}) # TODO: really optional? - - rules = package.load_utf8(json.load, - 'vocab', 'lemma_rules.json', - default={}) # TODO: really optional? - + with pkg.open(('wordnet', 'index.%s' % pos), default=None) as file_: + index[pos] = read_index(file_) if file_ is not None else set() + with pkg.open(('wordnet', '%s.exc' % pos), default=None) as file_: + exc[pos] = read_exc(file_) if file_ is not None else {} + with pkg.open(('vocab', 'lemma_rules.json'), default=None) as file_: + rules = json.load(file_) if file_ is not None else {} return cls(index, exc, rules) def __init__(self, index, exceptions, rules):