mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-12 15:25:47 +03:00
isort + flake8 fixed
This commit is contained in:
parent
396266904e
commit
f3967e8b91
|
@ -5,11 +5,7 @@ from thinc.api import Model
|
|||
from ...language import BaseDefaults, Language
|
||||
from .lemmatizer import FrenchLemmatizer
|
||||
from .lex_attrs import LEX_ATTRS
|
||||
from .punctuation import (
|
||||
TOKENIZER_INFIXES,
|
||||
TOKENIZER_PREFIXES,
|
||||
TOKENIZER_SUFFIXES,
|
||||
)
|
||||
from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
|
||||
from .stop_words import STOP_WORDS
|
||||
from .syntax_iterators import SYNTAX_ITERATORS
|
||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
|
||||
|
|
|
@ -28,7 +28,7 @@ sentences = [
|
|||
"Sinon mets-en un peu par terre.",
|
||||
"il n'y a plus rien ici, enfin j'crois, nos p'tites affaires ont été enl'vées.",
|
||||
"aujourd'hui, c'est comme ça.",
|
||||
"un.e directeur.ice, des employé.es, ",
|
||||
"un.e directeur.ice, des employé.es.",
|
||||
"des non-humain-es étaient là aussi, visiblement heureux·ses.",
|
||||
"j'ai trouvé ça surhttps://site_inexistant.fr/accueil#milieu ou www.quelque_part.com/ je pense.",
|
||||
"ou alors le 21/12 oui c'est ça c'était le 21/12/2023... ou alors le 12.02.2005",
|
||||
|
|
|
@ -5,9 +5,9 @@ from ..char_classes import (
|
|||
CONCAT_QUOTES,
|
||||
CURRENCY,
|
||||
LIST_ELLIPSES,
|
||||
LIST_ICONS,
|
||||
LIST_PUNCT,
|
||||
LIST_QUOTES,
|
||||
LIST_ICONS,
|
||||
UNITS,
|
||||
)
|
||||
|
||||
|
@ -20,12 +20,12 @@ _suffix_inversion = [
|
|||
"nous", "vous", "elles", "ils", "iels",
|
||||
"moi", "toi", "lui", "leur", "eux",
|
||||
# to avoid matching: Villar-le-bois
|
||||
fr"la(?![{HYPHENS}])",
|
||||
fr"le(?![{HYPHENS}])",
|
||||
fr"la(?![{HYPHENS}])",
|
||||
fr"le(?![{HYPHENS}])",
|
||||
fr"les(?![{HYPHENS}])",
|
||||
fr"en(?![{HYPHENS}])", "y",
|
||||
# a-t-on, a-t'on
|
||||
fr"t[{HYPHENS}]??[{ELISION}]?",
|
||||
fr"t[{HYPHENS}]??[{ELISION}]?",
|
||||
fr"m[{ELISION}]?",
|
||||
"là", "ici",
|
||||
]
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
from ...util import update_exc
|
||||
from ...symbols import NORM, ORTH
|
||||
from ...util import update_exc
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
|
||||
|
||||
_exc = {
|
||||
"St": [{ORTH: "St", NORM: "Saint"}],
|
||||
"Ste": [{ORTH: "Ste", NORM: "Sainte"}],
|
||||
|
|
Loading…
Reference in New Issue
Block a user