mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
* Restore merge_mwe in English class
This commit is contained in:
parent
3c270fc8ff
commit
6859f6adac
|
@ -36,6 +36,7 @@ def get_lex_props(string):
|
||||||
}
|
}
|
||||||
|
|
||||||
if_model_present = -1
|
if_model_present = -1
|
||||||
|
LOCAL_DATA_DIR = path.join(path.dirname(__file__), 'data')
|
||||||
|
|
||||||
|
|
||||||
class English(object):
|
class English(object):
|
||||||
|
@ -64,7 +65,7 @@ class English(object):
|
||||||
EntityTransitionSystem = BiluoPushDown
|
EntityTransitionSystem = BiluoPushDown
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
data_dir=path.join(path.dirname(__file__), 'data'),
|
data_dir=LOCAL_DATA_DIR
|
||||||
Tokenizer=Tokenizer.from_dir,
|
Tokenizer=Tokenizer.from_dir,
|
||||||
Tagger=EnPosTagger,
|
Tagger=EnPosTagger,
|
||||||
Parser=ParserFactory(ParserTransitionSystem),
|
Parser=ParserFactory(ParserTransitionSystem),
|
||||||
|
@ -106,7 +107,7 @@ class English(object):
|
||||||
('NNP', 'DATE', regexes.DAYS_RE),
|
('NNP', 'DATE', regexes.DAYS_RE),
|
||||||
('CD', 'MONEY', regexes.MONEY_RE)])
|
('CD', 'MONEY', regexes.MONEY_RE)])
|
||||||
|
|
||||||
def __call__(self, text, tag=True, parse=True, entity=True):
|
def __call__(self, text, tag=True, parse=True, entity=True, merge_mwes=False):
|
||||||
"""Apply the pipeline to some text. The text can span multiple sentences,
|
"""Apply the pipeline to some text. The text can span multiple sentences,
|
||||||
and can contain arbtrary whitespace. Alignment into the original string
|
and can contain arbtrary whitespace. Alignment into the original string
|
||||||
is preserved.
|
is preserved.
|
||||||
|
@ -130,6 +131,8 @@ class English(object):
|
||||||
self.parser(tokens)
|
self.parser(tokens)
|
||||||
if self.entity and entity:
|
if self.entity and entity:
|
||||||
self.entity(tokens)
|
self.entity(tokens)
|
||||||
|
if merge_mwes and self.mwe_merger is not None:
|
||||||
|
self.mwe_merger(tokens)
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
Loading…
Reference in New Issue
Block a user