* Restore merge_mwe in English class

This commit is contained in:
Matthew Honnibal 2015-07-08 19:34:55 +02:00
parent 3c270fc8ff
commit 6859f6adac

View File

@ -36,6 +36,7 @@ def get_lex_props(string):
} }
if_model_present = -1 if_model_present = -1
LOCAL_DATA_DIR = path.join(path.dirname(__file__), 'data')
class English(object): class English(object):
@ -64,7 +65,7 @@ class English(object):
EntityTransitionSystem = BiluoPushDown EntityTransitionSystem = BiluoPushDown
def __init__(self, def __init__(self,
data_dir=path.join(path.dirname(__file__), 'data'), data_dir=LOCAL_DATA_DIR
Tokenizer=Tokenizer.from_dir, Tokenizer=Tokenizer.from_dir,
Tagger=EnPosTagger, Tagger=EnPosTagger,
Parser=ParserFactory(ParserTransitionSystem), Parser=ParserFactory(ParserTransitionSystem),
@ -106,7 +107,7 @@ class English(object):
('NNP', 'DATE', regexes.DAYS_RE), ('NNP', 'DATE', regexes.DAYS_RE),
('CD', 'MONEY', regexes.MONEY_RE)]) ('CD', 'MONEY', regexes.MONEY_RE)])
def __call__(self, text, tag=True, parse=True, entity=True): def __call__(self, text, tag=True, parse=True, entity=True, merge_mwes=False):
"""Apply the pipeline to some text. The text can span multiple sentences, """Apply the pipeline to some text. The text can span multiple sentences,
and can contain arbtrary whitespace. Alignment into the original string and can contain arbtrary whitespace. Alignment into the original string
is preserved. is preserved.
@ -130,6 +131,8 @@ class English(object):
self.parser(tokens) self.parser(tokens)
if self.entity and entity: if self.entity and entity:
self.entity(tokens) self.entity(tokens)
if merge_mwes and self.mwe_merger is not None:
self.mwe_merger(tokens)
return tokens return tokens
@property @property