mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Improvement of rules now title insentive and have same declaration format
This commit is contained in:
parent
ed5f094451
commit
ad8129cb45
|
@ -117,24 +117,28 @@ def get_tokenizer_exceptions():
|
||||||
for verb, verb_lemma in (("a", "avoir"), ("est", "être"),
|
for verb, verb_lemma in (("a", "avoir"), ("est", "être"),
|
||||||
("semble", "sembler"), ("indique", "indiquer"),
|
("semble", "sembler"), ("indique", "indiquer"),
|
||||||
("moque", "moquer"), ("passe", "passer")):
|
("moque", "moquer"), ("passe", "passer")):
|
||||||
|
for orth in [verb,verb.title()]:
|
||||||
for pronoun in ("elle", "il", "on"):
|
for pronoun in ("elle", "il", "on"):
|
||||||
token = "{}-t-{}".format(verb, pronoun)
|
token = "{}-t-{}".format(orth, pronoun)
|
||||||
VERBS[token] = [
|
VERBS[token] = [
|
||||||
{LEMMA: verb_lemma, ORTH: verb},
|
{LEMMA: verb_lemma, ORTH: orth, TAG: "VERB"},
|
||||||
{LEMMA: "t", ORTH: "-t"},
|
{LEMMA: "t", ORTH: "-t"},
|
||||||
{LEMMA: pronoun, ORTH: "-" + pronoun}
|
{LEMMA: pronoun, ORTH: "-" + pronoun}
|
||||||
]
|
]
|
||||||
|
|
||||||
VERBS['est-ce'] = [
|
for verb, verb_lemma in [("est","être")]:
|
||||||
{LEMMA: 'être', ORTH: "est"},
|
for orth in [verb,verb.title()]:
|
||||||
|
token = "{}-ce".format(orth)
|
||||||
|
VERBS[token] = [
|
||||||
|
{LEMMA: verb_lemma, ORTH: orth, TAG: "VERB"},
|
||||||
{LEMMA: 'ce', ORTH: '-ce'}
|
{LEMMA: 'ce', ORTH: '-ce'}
|
||||||
]
|
]
|
||||||
|
|
||||||
for pre, pre_lemma in (("qu'", "que"), ("Qu'", "Que"), ("n'", "ne"),
|
for pre, pre_lemma in (("qu'", "que"), ("n'", "ne")):
|
||||||
("N'", "Ne")):
|
for orth in [pre,pre.title()]:
|
||||||
VERBS['{}est-ce'.format(pre)] = [
|
VERBS['{}est-ce'.format(orth)] = [
|
||||||
{LEMMA: pre_lemma, ORTH: pre},
|
{LEMMA: pre_lemma, ORTH: orth},
|
||||||
{LEMMA: 'être', ORTH: "est"},
|
{LEMMA: 'être', ORTH: "est", TAG: "VERB"},
|
||||||
{LEMMA: 'ce', ORTH: '-ce'}
|
{LEMMA: 'ce', ORTH: '-ce'}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user