diff --git a/website/meta/universe.json b/website/meta/universe.json index b39ebb528..e36ba5676 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -2837,6 +2837,56 @@ "tags": ["coreference", "multi-lingual", "cross-lingual", "allennlp"], "spacy_version": 3 }, + { + "id": "adeptaugmentations", + "title": "Adept Augmentations", + "slogan": " A Python library aimed at dissecting and augmenting NER training data for a few-shot scenario.", + "description": "EntitySwapAugmenter takes either a `datasets.Dataset` or a `spacy.tokens.DocBin`. Additionally, it is optional to provide a set of labels. It initially creates a knowledge base of entities belonging to a certain label. When running `augmenter.augment()` for N runs, it then creates N new sentences with random swaps of the original entities with an entity of the same corresponding label from the knowledge base.\n\nFor example, assuming that we have knowledge base for `PERSONS`, `LOCATIONS` and `PRODUCTS`. We can then create additional data for the sentence \"Momofuko Ando created instant noodles in Osaka.\" using `augmenter.augment(N=2)`, resulting in \"David created instant noodles in Madrid.\" or \"Tom created Adept Augmentations in the Netherlands\".", + "github": "davidberenstein1957/adept-augmentations", + "pip": "adept-augmentations", + "thumb": "https://raw.githubusercontent.com/Pandora-Intelligence/crosslingual-coreference/master/img/logo.png", + "code_example": [ + "import spacy", + "from spacy.tokens import DocBin", + "", + "from adept_augmentations import EntitySwapAugmenter", + "", + "nlp = spacy.load(\"en_core_web_sm\")", + "", + "TRAIN_DATA = [", + " \"Apple is looking at buying U.K. startup for $1 billion\",", + " \"Microsoft acquires GitHub for $7.5 billion\"", + "]", + "docs = nlp.pipe(TRAIN_DATA)", + "", + "# Create a new DocBin", + "doc_bin = DocBin(docs=docs)", + "", + "# Augment Data", + "doc_bin = EntitySwapAugmenter(doc_bin).augment(4)", + "for doc in doc_bin.get_docs(nlp.vocab):", + " print(doc.text)", + "", + "# Output", + "#", + "# GitHub is looking at buying U.K. startup for $ 7.5 billion", + "# Microsoft is looking at buying U.K. startup for $ 1 billion", + "# Microsoft is looking at buying U.K. startup for $ 7.5 billion", + "# GitHub is looking at buying U.K. startup for $ 1 billion", + "# Microsoft acquires Apple for $ 7.5 billion", + "# Apple acquires Microsoft for $ 1 billion", + "# Microsoft acquires Microsoft for $ 7.5 billion", + "# GitHub acquires GitHub for $ 1 billion" + ], + "author": "David Berenstein", + "author_links": { + "github": "davidberenstein1957", + "website": "https://www.linkedin.com/in/david-berenstein-1bab11105/" + }, + "category": ["standalone"], + "tags": ["ner", "few-shot", "augmentation", "datasets", "training"], + "spacy_version": 3 + }, { "id": "blackstone", "title": "Blackstone",