mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 04:40:20 +03:00
chore: added adept-augmentations to the spacy universe
This commit is contained in:
parent
cbc6bcf434
commit
c88dff22a9
|
@ -2810,6 +2810,54 @@
|
|||
"tags": ["coreference", "multi-lingual", "cross-lingual", "allennlp"],
|
||||
"spacy_version": 3
|
||||
},
|
||||
{
|
||||
"id": "adeptaugmentations",
|
||||
"title": "Adept Augmentations",
|
||||
"slogan": " A Python library aimed at dissecting and augmenting NER training data for a few-shot scenario.",
|
||||
"description": "EntitySwapAugmenter takes either a datasets.Dataset or a spacy.tokens.DocBin. Additionally, it is optional to provide a set of labels. It initially creates a knowledge base of entities belonging to a certain label. When running augmenter.augment() for N runs, it then creates N new sentences with random swaps of the original entities with an entity of the same corresponding label from the knowledge base. For example, assuming that we have knowledge base for PERSONS, LOCATIONS and PRODUCTS. We can then create additional data for the sentence \"Momofuko Ando created instant noodles in Osaka.\" using augmenter.augment(N=2), resulting in \"David created instant noodles in Madrid.\" or \"Tom created Adept Augmentations in the Netherlands\".",
|
||||
"github": "davidberenstein1957/adept-augmentations",
|
||||
"pip": "adept-augmentations",
|
||||
"thumb": "https://raw.githubusercontent.com/Pandora-Intelligence/crosslingual-coreference/master/img/logo.png",
|
||||
"code_example": [
|
||||
"import spacy",
|
||||
"from spacy.tokens import DocBin",
|
||||
"",
|
||||
"from adept_augmentations import EntitySwapAugmenter",
|
||||
"",
|
||||
"TRAIN_DATA = [",
|
||||
" \"Apple is looking at buying U.K. startup for $1 billion\"",
|
||||
" \"Microsoft acquires GitHub for $7.5 billion\"",
|
||||
"]",
|
||||
"docs = nlp.pipe(TRAIN_DATA)",
|
||||
"",
|
||||
"# Create a new DocBin",
|
||||
"doc_bin = DocBin(docs=docs)",
|
||||
"",
|
||||
"# Augment Data",
|
||||
"doc_bin = EntitySwapAugmenter(doc_bin).augment(4)",
|
||||
"for doc in doc_bin.get_docs(nlp.vocab):",
|
||||
" print(doc.text)",
|
||||
"",
|
||||
"# Output",
|
||||
"#",
|
||||
"# GitHub is looking at buying U.K. startup for $ 7.5 billion",
|
||||
"# Microsoft is looking at buying U.K. startup for $ 1 billion",
|
||||
"# Microsoft is looking at buying U.K. startup for $ 7.5 billion",
|
||||
"# GitHub is looking at buying U.K. startup for $ 1 billion",
|
||||
"# Microsoft acquires Apple for $ 7.5 billion",
|
||||
"# Apple acquires Microsoft for $ 1 billion",
|
||||
"# Microsoft acquires Microsoft for $ 7.5 billion",
|
||||
"# GitHub acquires GitHub for $ 1 billion"
|
||||
],
|
||||
"author": "David Berenstein",
|
||||
"author_links": {
|
||||
"github": "davidberenstein1957",
|
||||
"website": "https://www.linkedin.com/in/david-berenstein-1bab11105/"
|
||||
},
|
||||
"category": ["standalone"],
|
||||
"tags": ["ner", "few-shot", "augmentation", "datasets", "training"],
|
||||
"spacy_version": 3
|
||||
},
|
||||
{
|
||||
"id": "blackstone",
|
||||
"title": "Blackstone",
|
||||
|
|
Loading…
Reference in New Issue
Block a user