mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	chore: added adept-augmentations to the spacy universe (#12609)
* chore: added adept-augmentations to the spacy universe * Apply suggestions from code review Co-authored-by: Basile Dura <bdura@users.noreply.github.com> * Update universe.json --------- Co-authored-by: Basile Dura <bdura@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									54d9198e62
								
							
						
					
					
						commit
						81488fa88b
					
				| 
						 | 
					@ -2837,6 +2837,56 @@
 | 
				
			||||||
            "tags": ["coreference", "multi-lingual", "cross-lingual", "allennlp"],
 | 
					            "tags": ["coreference", "multi-lingual", "cross-lingual", "allennlp"],
 | 
				
			||||||
            "spacy_version": 3
 | 
					            "spacy_version": 3
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            "id": "adeptaugmentations",
 | 
				
			||||||
 | 
					            "title": "Adept Augmentations",
 | 
				
			||||||
 | 
					            "slogan": " A Python library aimed at dissecting and augmenting NER training data for a few-shot scenario.",
 | 
				
			||||||
 | 
					            "description": "EntitySwapAugmenter takes either a `datasets.Dataset` or a `spacy.tokens.DocBin`. Additionally, it is optional to provide a set of labels. It initially creates a knowledge base of entities belonging to a certain label. When running `augmenter.augment()` for N runs, it then creates N new sentences with random swaps of the original entities with an entity of the same corresponding label from the knowledge base.\n\nFor example, assuming that we have knowledge base for `PERSONS`, `LOCATIONS` and `PRODUCTS`. We can then create additional data for the sentence \"Momofuko Ando created instant noodles in Osaka.\" using `augmenter.augment(N=2)`, resulting in \"David created instant noodles in Madrid.\" or \"Tom created Adept Augmentations in the Netherlands\".",
 | 
				
			||||||
 | 
					            "github": "davidberenstein1957/adept-augmentations",
 | 
				
			||||||
 | 
					            "pip": "adept-augmentations",
 | 
				
			||||||
 | 
					            "thumb": "https://raw.githubusercontent.com/Pandora-Intelligence/crosslingual-coreference/master/img/logo.png",
 | 
				
			||||||
 | 
					            "code_example": [
 | 
				
			||||||
 | 
					                "import spacy",
 | 
				
			||||||
 | 
					                "from spacy.tokens import DocBin",
 | 
				
			||||||
 | 
					                "",
 | 
				
			||||||
 | 
					                "from adept_augmentations import EntitySwapAugmenter",
 | 
				
			||||||
 | 
					                "",
 | 
				
			||||||
 | 
					                "nlp = spacy.load(\"en_core_web_sm\")",
 | 
				
			||||||
 | 
					                "",
 | 
				
			||||||
 | 
					                "TRAIN_DATA = [",
 | 
				
			||||||
 | 
					                "    \"Apple is looking at buying U.K. startup for $1 billion\",",
 | 
				
			||||||
 | 
					                "    \"Microsoft acquires GitHub for $7.5 billion\"",
 | 
				
			||||||
 | 
					                "]",
 | 
				
			||||||
 | 
					                "docs = nlp.pipe(TRAIN_DATA)",
 | 
				
			||||||
 | 
					                "",
 | 
				
			||||||
 | 
					                "# Create a new DocBin",
 | 
				
			||||||
 | 
					                "doc_bin = DocBin(docs=docs)",
 | 
				
			||||||
 | 
					                "",
 | 
				
			||||||
 | 
					                "# Augment Data",
 | 
				
			||||||
 | 
					                "doc_bin = EntitySwapAugmenter(doc_bin).augment(4)",
 | 
				
			||||||
 | 
					                "for doc in doc_bin.get_docs(nlp.vocab):",
 | 
				
			||||||
 | 
					                "    print(doc.text)",
 | 
				
			||||||
 | 
					                "",
 | 
				
			||||||
 | 
					                "# Output",
 | 
				
			||||||
 | 
					                "#",
 | 
				
			||||||
 | 
					                "# GitHub is looking at buying U.K. startup for $ 7.5 billion",
 | 
				
			||||||
 | 
					                "# Microsoft is looking at buying U.K. startup for $ 1 billion",
 | 
				
			||||||
 | 
					                "# Microsoft is looking at buying U.K. startup for $ 7.5 billion",
 | 
				
			||||||
 | 
					                "# GitHub is looking at buying U.K. startup for $ 1 billion",
 | 
				
			||||||
 | 
					                "# Microsoft acquires Apple for $ 7.5 billion",
 | 
				
			||||||
 | 
					                "# Apple acquires Microsoft for $ 1 billion",
 | 
				
			||||||
 | 
					                "# Microsoft acquires Microsoft for $ 7.5 billion",
 | 
				
			||||||
 | 
					                "# GitHub acquires GitHub for $ 1 billion"
 | 
				
			||||||
 | 
					            ],
 | 
				
			||||||
 | 
					            "author": "David Berenstein",
 | 
				
			||||||
 | 
					            "author_links": {
 | 
				
			||||||
 | 
					                "github": "davidberenstein1957",
 | 
				
			||||||
 | 
					                "website": "https://www.linkedin.com/in/david-berenstein-1bab11105/"
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "category": ["standalone"],
 | 
				
			||||||
 | 
					            "tags": ["ner", "few-shot", "augmentation", "datasets", "training"],
 | 
				
			||||||
 | 
					            "spacy_version": 3
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            "id": "blackstone",
 | 
					            "id": "blackstone",
 | 
				
			||||||
            "title": "Blackstone",
 | 
					            "title": "Blackstone",
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user