mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-14 18:22:27 +03:00
add ressource 'presque' in universe.json
This commit is contained in:
parent
3fa464b8c7
commit
cb1f9457b4
|
@ -4519,19 +4519,29 @@
|
||||||
"category": ["standalone"]
|
"category": ["standalone"]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "quelquhui",
|
"id": "presque",
|
||||||
"title": "quelquhui",
|
"title": "presque",
|
||||||
"slogan": "tokenizer for contemporary french",
|
"slogan": "normalizer for contemporary french.",
|
||||||
"description": "A tokenizer for french that handles inword parentheses like in _(b)rouille_, inclusive language (won't split _relecteur.rice.s_,but will split _mais.maintenant_), hyphens (split _peut-on_, or _pouvons-vous_ but not _tubulu-pimpant_), apostrophes (split _j'arrive_ or _j'arrivons_, but not _aujourd'hui_ or _r'garder_), emoticons, text-emoji (_:happy:_), urls, mails, ...",
|
"description": "normalizer for french with focus on online and informal communication, _peùUUUt-èTRE_ becomes _peut-être_, _voilaaaa_ becomes _voilà_. it also harmonizes inclusive language (the user can chose how): by default, _auteur-rice-s-x et relecteur.xrices_ becomes _auteur·ricexs et relecteur·ricexs_.",
|
||||||
"github": "thjbdvlt/quelquhui",
|
"github": "thjbdvlt/presque",
|
||||||
"code_example": ["import spacy", "import quelquhui", "nlp = spacy.load('fr_core_news_lg')", "nlp.tokenizer = quelquhui.Toquenizer(nlp.vocab)"],
|
"code_example": [
|
||||||
|
"import spacy",
|
||||||
|
"import presque",
|
||||||
|
"",
|
||||||
|
"@spacy.Language.factory('presque_normalizer')",
|
||||||
|
"def create_presque_normalizer(nlp, name='presque_normalizer'):",
|
||||||
|
"return presque.Normalizer(nlp=nlp)",
|
||||||
|
"",
|
||||||
|
"nlp = spacy.load('fr_core_news_lg')",
|
||||||
|
"nlp.add_pipe('presque_normalizer', first=True)"
|
||||||
|
],
|
||||||
"code_language": "python",
|
"code_language": "python",
|
||||||
"author": "thjbdvlt",
|
"author": "thjbdvlt",
|
||||||
"author_links": {
|
"author_links": {
|
||||||
"github": "thjbdvlt"
|
"github": "thjbdvlt"
|
||||||
},
|
},
|
||||||
"category": ["pipeline"],
|
"category": ["pipeline"],
|
||||||
"tags": ["tokenizer", "french"]
|
"tags": ["normalizer", "french"]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user