mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Wire up English lemma and morph rules.
This commit is contained in:
parent
f70be44746
commit
8dbff4f5f4
|
@ -31,6 +31,7 @@ class English(Language):
|
|||
tag_map = TAG_MAP
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
morph_rules = dict(MORPH_RULES)
|
||||
lemma_rules = dict(LEMMA_RULES)
|
||||
lemma_index = dict(LEMMA_INDEX)
|
||||
lemma_exc = dict(LEMMA_EXC)
|
||||
|
|
|
@ -9,6 +9,9 @@ from .tag_map import TAG_MAP
|
|||
from .word_sets import STOP_WORDS, NUM_WORDS
|
||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, ORTH_ONLY
|
||||
from .morph_rules import MORPH_RULES
|
||||
from .lemmatizer import RULES as LEMMA_RULES
|
||||
from .lemmatizer import INDEX as LEMMA_INDEX
|
||||
from .lemmatizer import EXC as LEMMA_EXC
|
||||
|
||||
|
||||
TAG_MAP = dict(TAG_MAP)
|
||||
|
@ -22,4 +25,5 @@ update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.EMOTICONS))
|
|||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS))
|
||||
|
||||
|
||||
__all__ = ["TOKENIZER_EXCEPTIONS", "TAG_MAP", "STOP_WORDS", "MORPH_RULES"]
|
||||
__all__ = ["TOKENIZER_EXCEPTIONS", "TAG_MAP", "STOP_WORDS", "MORPH_RULES",
|
||||
"LEMMA_RULES", "LEMMA_INDEX", "LEMMA_EXC"]
|
||||
|
|
|
@ -770,5 +770,5 @@ ORTH_ONLY = [
|
|||
"Rev.",
|
||||
"Sen.",
|
||||
"St.",
|
||||
"vs."
|
||||
"vs.",
|
||||
]
|
||||
|
|
Loading…
Reference in New Issue
Block a user