mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
chore: added adept-augmentations to the spacy universe (#12609)
* chore: added adept-augmentations to the spacy universe * Apply suggestions from code review Co-authored-by: Basile Dura <bdura@users.noreply.github.com> * Update universe.json --------- Co-authored-by: Basile Dura <bdura@users.noreply.github.com>
This commit is contained in:
parent
15f16db6ca
commit
d11b549195
|
@ -2837,6 +2837,56 @@
|
|||
"tags": ["coreference", "multi-lingual", "cross-lingual", "allennlp"],
|
||||
"spacy_version": 3
|
||||
},
|
||||
{
|
||||
"id": "adeptaugmentations",
|
||||
"title": "Adept Augmentations",
|
||||
"slogan": " A Python library aimed at dissecting and augmenting NER training data for a few-shot scenario.",
|
||||
"description": "EntitySwapAugmenter takes either a `datasets.Dataset` or a `spacy.tokens.DocBin`. Additionally, it is optional to provide a set of labels. It initially creates a knowledge base of entities belonging to a certain label. When running `augmenter.augment()` for N runs, it then creates N new sentences with random swaps of the original entities with an entity of the same corresponding label from the knowledge base.\n\nFor example, assuming that we have knowledge base for `PERSONS`, `LOCATIONS` and `PRODUCTS`. We can then create additional data for the sentence \"Momofuko Ando created instant noodles in Osaka.\" using `augmenter.augment(N=2)`, resulting in \"David created instant noodles in Madrid.\" or \"Tom created Adept Augmentations in the Netherlands\".",
|
||||
"github": "davidberenstein1957/adept-augmentations",
|
||||
"pip": "adept-augmentations",
|
||||
"thumb": "https://raw.githubusercontent.com/Pandora-Intelligence/crosslingual-coreference/master/img/logo.png",
|
||||
"code_example": [
|
||||
"import spacy",
|
||||
"from spacy.tokens import DocBin",
|
||||
"",
|
||||
"from adept_augmentations import EntitySwapAugmenter",
|
||||
"",
|
||||
"nlp = spacy.load(\"en_core_web_sm\")",
|
||||
"",
|
||||
"TRAIN_DATA = [",
|
||||
" \"Apple is looking at buying U.K. startup for $1 billion\",",
|
||||
" \"Microsoft acquires GitHub for $7.5 billion\"",
|
||||
"]",
|
||||
"docs = nlp.pipe(TRAIN_DATA)",
|
||||
"",
|
||||
"# Create a new DocBin",
|
||||
"doc_bin = DocBin(docs=docs)",
|
||||
"",
|
||||
"# Augment Data",
|
||||
"doc_bin = EntitySwapAugmenter(doc_bin).augment(4)",
|
||||
"for doc in doc_bin.get_docs(nlp.vocab):",
|
||||
" print(doc.text)",
|
||||
"",
|
||||
"# Output",
|
||||
"#",
|
||||
"# GitHub is looking at buying U.K. startup for $ 7.5 billion",
|
||||
"# Microsoft is looking at buying U.K. startup for $ 1 billion",
|
||||
"# Microsoft is looking at buying U.K. startup for $ 7.5 billion",
|
||||
"# GitHub is looking at buying U.K. startup for $ 1 billion",
|
||||
"# Microsoft acquires Apple for $ 7.5 billion",
|
||||
"# Apple acquires Microsoft for $ 1 billion",
|
||||
"# Microsoft acquires Microsoft for $ 7.5 billion",
|
||||
"# GitHub acquires GitHub for $ 1 billion"
|
||||
],
|
||||
"author": "David Berenstein",
|
||||
"author_links": {
|
||||
"github": "davidberenstein1957",
|
||||
"website": "https://www.linkedin.com/in/david-berenstein-1bab11105/"
|
||||
},
|
||||
"category": ["standalone"],
|
||||
"tags": ["ner", "few-shot", "augmentation", "datasets", "training"],
|
||||
"spacy_version": 3
|
||||
},
|
||||
{
|
||||
"id": "blackstone",
|
||||
"title": "Blackstone",
|
||||
|
|
Loading…
Reference in New Issue
Block a user