mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-21 01:21:58 +03:00
add ressource 'quelquhui'
This commit is contained in:
parent
82fc2ecfa5
commit
0d9d11bf4e
|
@ -1,3 +1,4 @@
|
|||
|
||||
{
|
||||
"resources": [
|
||||
{
|
||||
|
@ -4517,6 +4518,21 @@
|
|||
"website": "https://redfield.ai"
|
||||
},
|
||||
"category": ["standalone"]
|
||||
},
|
||||
{
|
||||
"id": "quelquhui",
|
||||
"title": "quelquhui",
|
||||
"slogan": "tokenizer for contemporary french",
|
||||
"description": "A tokenizer for french that handles inword parentheses like in _(b)rouille_, inclusive language (won't split _relecteur.rice.s_,but will split _mais.maintenant_), hyphens (split _peut-on_, or _pouvons-vous_ but not _tubulu-pimpant_), apostrophes (split _j'arrive_ or _j'arrivons_, but not _aujourd'hui_ or _r'garder_), emoticons, text-emoji (_:happy:_), urls, mails, ...",
|
||||
"github": "thjbdvlt/quelquhui",
|
||||
"code_example": ["import spacy", "import quelquhui", "nlp = spacy.load('fr_core_news_lg')", "nlp.tokenizer = quelquhui.Toquenizer(nlp.vocab)"],
|
||||
"code_language": "python",
|
||||
"author": "thjbdvlt",
|
||||
"author_links": {
|
||||
"github": "thjbdvlt"
|
||||
},
|
||||
"category": ["pipeline"],
|
||||
"tags": ["tokenizer", "french"]
|
||||
}
|
||||
],
|
||||
|
||||
|
@ -4608,3 +4624,4 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user