mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 16:24:16 +03:00
Adding spacy-cleaner
to the spaCy universe (#11674)
* added spacy-cleaner to the spaCy universe * Move data to righ section of universe.json * Cleanup - fix typo ("replacers") - spaCy doesn't need to be marked as code - lemma of "Hello" is lower case Co-authored-by: Paul O'Leary McCann <polm@dampfkraft.com>
This commit is contained in:
parent
bf83f6872a
commit
b69d249a22
|
@ -1,5 +1,46 @@
|
||||||
{
|
{
|
||||||
"resources": [
|
"resources": [
|
||||||
|
{
|
||||||
|
"id": "spacy-cleaner",
|
||||||
|
"title": "spacy-cleaner",
|
||||||
|
"slogan": "Easily clean text with spaCy!",
|
||||||
|
"description": "**spacy-cleaner** utilises spaCy `Language` models to replace, remove, and \n mutate spaCy tokens. Cleaning actions available are:\n\n* Remove/replace stopwords.\n* Remove/replace punctuation.\n* Remove/replace numbers.\n* Remove/replace emails.\n* Remove/replace URLs.\n* Perform lemmatisation.\n\nSee our [docs](https://ce11an.github.io/spacy-cleaner/) for more information.",
|
||||||
|
"github": "Ce11an/spacy-cleaner",
|
||||||
|
"pip": "spacy-cleaner",
|
||||||
|
"code_example": [
|
||||||
|
"import spacy",
|
||||||
|
"import spacy_cleaner",
|
||||||
|
"from spacy_cleaner.processing import removers, replacers, mutators",
|
||||||
|
"",
|
||||||
|
"model = spacy.load(\"en_core_web_sm\")",
|
||||||
|
"pipeline = spacy_cleaner.Pipeline(",
|
||||||
|
" model,",
|
||||||
|
" removers.remove_stopword_token,",
|
||||||
|
" replacers.replace_punctuation_token,",
|
||||||
|
" mutators.mutate_lemma_token,",
|
||||||
|
")",
|
||||||
|
"",
|
||||||
|
"texts = [\"Hello, my name is Cellan! I love to swim!\"]",
|
||||||
|
"",
|
||||||
|
"pipeline.clean(texts)",
|
||||||
|
"# ['hello _IS_PUNCT_ Cellan _IS_PUNCT_ love swim _IS_PUNCT_']"
|
||||||
|
],
|
||||||
|
"code_language": "python",
|
||||||
|
"url": "https://ce11an.github.io/spacy-cleaner/",
|
||||||
|
"image": "https://raw.githubusercontent.com/Ce11an/spacy-cleaner/main/docs/assets/images/spacemen.png",
|
||||||
|
"author": "Cellan Hall",
|
||||||
|
"author_links": {
|
||||||
|
"twitter": "Ce11an",
|
||||||
|
"github": "Ce11an",
|
||||||
|
"website": "https://www.linkedin.com/in/cellan-hall/"
|
||||||
|
},
|
||||||
|
"category": [
|
||||||
|
"extension"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"text-processing"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "Zshot",
|
"id": "Zshot",
|
||||||
"title": "Zshot",
|
"title": "Zshot",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user