diff --git a/website/meta/universe.json b/website/meta/universe.json index 1315d8a1c..4bdff5a10 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -4519,19 +4519,29 @@ "category": ["standalone"] }, { - "id": "quelquhui", - "title": "quelquhui", - "slogan": "tokenizer for contemporary french", - "description": "A tokenizer for french that handles inword parentheses like in _(b)rouille_, inclusive language (won't split _relecteur.rice.s_,but will split _mais.maintenant_), hyphens (split _peut-on_, or _pouvons-vous_ but not _tubulu-pimpant_), apostrophes (split _j'arrive_ or _j'arrivons_, but not _aujourd'hui_ or _r'garder_), emoticons, text-emoji (_:happy:_), urls, mails, ...", - "github": "thjbdvlt/quelquhui", - "code_example": ["import spacy", "import quelquhui", "nlp = spacy.load('fr_core_news_lg')", "nlp.tokenizer = quelquhui.Toquenizer(nlp.vocab)"], + "id": "presque", + "title": "presque", + "slogan": "normalizer for contemporary french.", + "description": "normalizer for french with focus on online and informal communication, _peùUUUt-èTRE_ becomes _peut-être_, _voilaaaa_ becomes _voilà_. it also harmonizes inclusive language (the user can chose how): by default, _auteur-rice-s-x et relecteur.xrices_ becomes _auteur·ricexs et relecteur·ricexs_.", + "github": "thjbdvlt/presque", + "code_example": [ + "import spacy", + "import presque", + "", + "@spacy.Language.factory('presque_normalizer')", + "def create_presque_normalizer(nlp, name='presque_normalizer'):", + "return presque.Normalizer(nlp=nlp)", + "", + "nlp = spacy.load('fr_core_news_lg')", + "nlp.add_pipe('presque_normalizer', first=True)" + ], "code_language": "python", "author": "thjbdvlt", "author_links": { "github": "thjbdvlt" }, "category": ["pipeline"], - "tags": ["tokenizer", "french"] + "tags": ["normalizer", "french"] } ],