docs: added reference to spacy-setfit to the spaCy Universe (#12737)

* docs: added reference to spacy-setfit * removed package import after adding factory entry points to packages
2026-02-02 13:36:18 +03:00 · 2023-06-19 15:52:07 +02:00 · 2023-06-19 15:52:07 +02:00 · 53c400bd7a
commit 53c400bd7a
parent 3125b97ace
1 changed files with 55 additions and 10 deletions
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -2739,10 +2739,9 @@
            "description": "Have you ever struggled with needing a [spaCy TextCategorizer](https://spacy.io/api/textcategorizer) but didn't have the time to train one from scratch? Classy Classification is the way to go! For few-shot classification using [sentence-transformers](https://github.com/UKPLab/sentence-transformers) or [spaCy models](https://spacy.io/usage/models), provide a dictionary with labels and examples, or just provide a list of labels for zero shot-classification with [Huggingface zero-shot classifiers](https://huggingface.co/models?pipeline_tag=zero-shot-classification).",
            "github": "davidberenstein1957/classy-classification",
            "pip": "classy-classification",
-            "thumb": "https://raw.githubusercontent.com/Pandora-Intelligence/classy-classification/master/logo.png",
+            "thumb": "https://raw.githubusercontent.com/davidberenstein1957/classy-classification/master/logo.png",
            "code_example": [
                "import spacy",
-                "import classy_classification",
                "",
                "data = {",
                "    \"furniture\": [\"This text is about chairs.\",",
@ -2787,14 +2786,13 @@
            "title": "Concise Concepts",
            "slogan": "Concise Concepts uses few-shot NER based on word embedding similarity to get you going with easy!",
            "description": "When wanting to apply NER to concise concepts, it is really easy to come up with examples, but it takes some effort to train an entire pipeline. Concise Concepts uses few-shot NER based on word embedding similarity to get you going with easy!",
-            "github": "pandora-intelligence/concise-concepts",
+            "github": "davidberenstein1957/concise-concepts",
            "pip": "concise-concepts",
-            "thumb": "https://raw.githubusercontent.com/Pandora-Intelligence/concise-concepts/master/img/logo.png",
-            "image": "https://raw.githubusercontent.com/Pandora-Intelligence/concise-concepts/master/img/example.png",
+            "thumb": "https://raw.githubusercontent.com/davidberenstein1957/concise-concepts/master/img/logo.png",
+            "image": "https://raw.githubusercontent.com/davidberenstein1957/concise-concepts/master/img/example.png",
            "code_example": [
                "import spacy",
                "from spacy import displacy",
-                "import concise_concepts",
                "",
                "data = {",
                "    \"fruit\": [\"apple\", \"pear\", \"orange\"],",
@ -2834,13 +2832,12 @@
            "title": "Crosslingual Coreference",
            "slogan": "One multi-lingual coreference model to rule them all!",
            "description": "Coreference is amazing but the data required for training a model is very scarce. In our case, the available training for non-English languages also data proved to be poorly annotated. Crosslingual Coreference therefore uses the assumption a trained model with English data and cross-lingual embeddings should work for other languages with similar sentence structure. Verified to work quite well for at least (EN, NL, DK, FR, DE).",
-            "github": "pandora-intelligence/crosslingual-coreference",
+            "github": "davidberenstein1957/crosslingual-coreference",
            "pip": "crosslingual-coreference",
-            "thumb": "https://raw.githubusercontent.com/Pandora-Intelligence/crosslingual-coreference/master/img/logo.png",
-            "image": "https://raw.githubusercontent.com/Pandora-Intelligence/crosslingual-coreference/master/img/example_total.png",
+            "thumb": "https://raw.githubusercontent.com/davidberenstein1957/crosslingual-coreference/master/img/logo.png",
+            "image": "https://raw.githubusercontent.com/davidberenstein1957/crosslingual-coreference/master/img/example_total.png",
            "code_example": [
                "import spacy",
-                "import crosslingual_coreference",
                "",
                "text = \"\"\"",
                "    Do not forget about Momofuku Ando!",
@ -2933,6 +2930,54 @@
            "tags": ["ner", "few-shot", "augmentation", "datasets", "training"],
            "spacy_version": 3
        },
+        {
+            "id": "spacysetfit",
+            "title": "spaCy-SetFit",
+            "slogan": "An an easy and intuitive approach to use SetFit in combination with spaCy.",
+            "description": "spaCy-SetFit is a Python library that extends spaCy's text categorization capabilities by incorporating SetFit for few-shot classification. It allows you to train a text categorizer using a intuitive dictionary. \n\nThe library integrates with spaCy's pipeline architecture, enabling easy integration and configuration of the text categorizer component. You can provide a training dataset containing inlier and outlier examples, and spaCy-SetFit will use the paraphrase-MiniLM-L3-v2 model for training the text categorizer with SetFit. Once trained, you can use the categorizer to classify new text and obtain category probabilities.",
+            "github": "davidberenstein1957/spacy-setfit",
+            "pip": "spacy-setfit",
+            "thumb": "https://raw.githubusercontent.com/davidberenstein1957/spacy-setfit/main/logo.png",
+            "code_example": [
+            "import spacy",
+            "",
+            "# Create some example data",
+            "train_dataset = {",
+            "    \"inlier\": [",
+            "        \"Text about furniture\",",
+            "        \"Couches, benches and televisions.\",",
+            "        \"I really need to get a new sofa.\"",
+            "    ],",
+            "    \"outlier\": [",
+            "        \"Text about kitchen equipment\",",
+            "        \"This text is about politics\",",
+            "        \"Comments about AI and stuff.\"",
+            "    ]",
+            "}",
+            "",
+            "# Load the spaCy language model:",
+            "nlp = spacy.load(\"en_core_web_sm\")",
+            "",
+            "# Add the \"text_categorizer\" pipeline component to the spaCy model, and configure it with SetFit parameters:",
+            "nlp.add_pipe(\"text_categorizer\", config={",
+            "    \"pretrained_model_name_or_path\": \"paraphrase-MiniLM-L3-v2\",",
+            "    \"setfit_trainer_args\": {",
+            "        \"train_dataset\": train_dataset",
+            "    }",
+            "})",
+            "doc = nlp(\"I really need to get a new sofa.\")",
+            "doc.cats",
+            "# {'inlier': 0.902350975129, 'outlier': 0.097649024871}"
+            ],
+            "author": "David Berenstein",
+            "author_links": {
+                "github": "davidberenstein1957",
+                "website": "https://www.linkedin.com/in/david-berenstein-1bab11105/"
+            },
+            "category": ["pipeline"],
+            "tags": ["few-shot", "SetFit", "training"],
+            "spacy_version": 3
+        },
        {
            "id": "blackstone",
            "title": "Blackstone",