mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 00:04:15 +03:00
Adding spacy-cleaner
to the spaCy universe (#11674)
* added spacy-cleaner to the spaCy universe * Move data to righ section of universe.json * Cleanup - fix typo ("replacers") - spaCy doesn't need to be marked as code - lemma of "Hello" is lower case Co-authored-by: Paul O'Leary McCann <polm@dampfkraft.com>
This commit is contained in:
parent
bf83f6872a
commit
b69d249a22
|
@ -1,5 +1,46 @@
|
|||
{
|
||||
"resources": [
|
||||
{
|
||||
"id": "spacy-cleaner",
|
||||
"title": "spacy-cleaner",
|
||||
"slogan": "Easily clean text with spaCy!",
|
||||
"description": "**spacy-cleaner** utilises spaCy `Language` models to replace, remove, and \n mutate spaCy tokens. Cleaning actions available are:\n\n* Remove/replace stopwords.\n* Remove/replace punctuation.\n* Remove/replace numbers.\n* Remove/replace emails.\n* Remove/replace URLs.\n* Perform lemmatisation.\n\nSee our [docs](https://ce11an.github.io/spacy-cleaner/) for more information.",
|
||||
"github": "Ce11an/spacy-cleaner",
|
||||
"pip": "spacy-cleaner",
|
||||
"code_example": [
|
||||
"import spacy",
|
||||
"import spacy_cleaner",
|
||||
"from spacy_cleaner.processing import removers, replacers, mutators",
|
||||
"",
|
||||
"model = spacy.load(\"en_core_web_sm\")",
|
||||
"pipeline = spacy_cleaner.Pipeline(",
|
||||
" model,",
|
||||
" removers.remove_stopword_token,",
|
||||
" replacers.replace_punctuation_token,",
|
||||
" mutators.mutate_lemma_token,",
|
||||
")",
|
||||
"",
|
||||
"texts = [\"Hello, my name is Cellan! I love to swim!\"]",
|
||||
"",
|
||||
"pipeline.clean(texts)",
|
||||
"# ['hello _IS_PUNCT_ Cellan _IS_PUNCT_ love swim _IS_PUNCT_']"
|
||||
],
|
||||
"code_language": "python",
|
||||
"url": "https://ce11an.github.io/spacy-cleaner/",
|
||||
"image": "https://raw.githubusercontent.com/Ce11an/spacy-cleaner/main/docs/assets/images/spacemen.png",
|
||||
"author": "Cellan Hall",
|
||||
"author_links": {
|
||||
"twitter": "Ce11an",
|
||||
"github": "Ce11an",
|
||||
"website": "https://www.linkedin.com/in/cellan-hall/"
|
||||
},
|
||||
"category": [
|
||||
"extension"
|
||||
],
|
||||
"tags": [
|
||||
"text-processing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "Zshot",
|
||||
"title": "Zshot",
|
||||
|
|
Loading…
Reference in New Issue
Block a user