Adding spacy-cleaner to the spaCy universe (#11674)

* added spacy-cleaner to the spaCy universe * Move data to righ section of universe.json * Cleanup - fix typo ("replacers") - spaCy doesn't need to be marked as code - lemma of "Hello" is lower case Co-authored-by: Paul O'Leary McCann <polm@dampfkraft.com>
2026-02-17 04:30:49 +03:00 · 2022-10-20 12:38:29 +01:00 · 2022-10-20 12:38:29 +01:00 · b69d249a22
commit b69d249a22
parent bf83f6872a
1 changed files with 41 additions and 0 deletions
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -1,5 +1,46 @@
 {
    "resources": [
+        {
+            "id": "spacy-cleaner",
+            "title": "spacy-cleaner",
+            "slogan": "Easily clean text with spaCy!",
+            "description": "**spacy-cleaner** utilises spaCy `Language` models to replace, remove, and \n  mutate spaCy tokens. Cleaning actions available are:\n\n* Remove/replace stopwords.\n* Remove/replace punctuation.\n* Remove/replace numbers.\n* Remove/replace emails.\n* Remove/replace URLs.\n* Perform lemmatisation.\n\nSee our [docs](https://ce11an.github.io/spacy-cleaner/) for more information.",
+            "github": "Ce11an/spacy-cleaner",
+            "pip": "spacy-cleaner",
+            "code_example": [
+                "import spacy",
+                "import spacy_cleaner",
+                "from spacy_cleaner.processing import removers, replacers, mutators",
+                "",
+                "model = spacy.load(\"en_core_web_sm\")",
+                "pipeline = spacy_cleaner.Pipeline(",
+                "    model,",
+                "    removers.remove_stopword_token,",
+                "    replacers.replace_punctuation_token,",
+                "    mutators.mutate_lemma_token,",
+                ")",
+                "",
+                "texts = [\"Hello, my name is Cellan! I love to swim!\"]",
+                "",
+                "pipeline.clean(texts)",
+                "# ['hello _IS_PUNCT_ Cellan _IS_PUNCT_ love swim _IS_PUNCT_']"
+            ],
+            "code_language": "python",
+            "url": "https://ce11an.github.io/spacy-cleaner/",
+            "image": "https://raw.githubusercontent.com/Ce11an/spacy-cleaner/main/docs/assets/images/spacemen.png",
+            "author": "Cellan Hall",
+            "author_links": {
+                "twitter": "Ce11an",
+                "github": "Ce11an",
+                "website": "https://www.linkedin.com/in/cellan-hall/"
+            },
+            "category": [
+                "extension"
+            ],
+            "tags": [
+                "text-processing"
+            ]
+        },
        {
            "id": "Zshot",
            "title": "Zshot",