Batch #1 | spaCy universe cleanup (#10642)

* delete universe object: wmd-relax

* delete universe object: spaCy.jl

* delete universe object: saber

* delete universe object: languagecrunch

* delete universe object: gracyql

* delete universe object: ExcelCy

* delete universe object: EpiTator

Co-authored-by: schaeran <schaeran1994@gmail.com>
This commit is contained in:
Schero1994 2022-04-14 10:08:19 +02:00 committed by GitHub
parent 4228f3c757
commit caf8528af7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -591,23 +591,6 @@
"category": ["conversational", "standalone"], "category": ["conversational", "standalone"],
"tags": ["chatbots"] "tags": ["chatbots"]
}, },
{
"id": "saber",
"title": "saber",
"slogan": "Deep-learning based tool for information extraction in the biomedical domain",
"github": "BaderLab/saber",
"pip": "saber",
"thumb": "https://raw.githubusercontent.com/BaderLab/saber/master/docs/img/saber_logo.png",
"code_example": [
"from saber.saber import Saber",
"saber = Saber()",
"saber.load('PRGE')",
"saber.annotate('The phosphorylation of Hdm2 by MK2 promotes the ubiquitination of p53.')"
],
"author": "Bader Lab, University of Toronto",
"category": ["scientific"],
"tags": ["keras", "biomedical"]
},
{ {
"id": "alibi", "id": "alibi",
"title": "alibi", "title": "alibi",
@ -1076,29 +1059,6 @@
"category": ["pipeline"], "category": ["pipeline"],
"tags": ["pipeline", "readability", "syntactic complexity", "descriptive statistics"] "tags": ["pipeline", "readability", "syntactic complexity", "descriptive statistics"]
}, },
{
"id": "wmd-relax",
"slogan": "Calculates word mover's distance insanely fast",
"description": "Calculates Word Mover's Distance as described in [From Word Embeddings To Document Distances](http://www.cs.cornell.edu/~kilian/papers/wmd_metric.pdf) by Matt Kusner, Yu Sun, Nicholas Kolkin and Kilian Weinberger.\n\n⚠ **This package is currently only compatible with spaCy v.1x.**",
"github": "src-d/wmd-relax",
"thumb": "https://i.imgur.com/f91C3Lf.jpg",
"code_example": [
"import spacy",
"import wmd",
"",
"nlp = spacy.load('en', create_pipeline=wmd.WMD.create_spacy_pipeline)",
"doc1 = nlp(\"Politician speaks to the media in Illinois.\")",
"doc2 = nlp(\"The president greets the press in Chicago.\")",
"print(doc1.similarity(doc2))"
],
"author": "source{d}",
"author_links": {
"github": "src-d",
"twitter": "sourcedtech",
"website": "https://sourced.tech"
},
"category": ["pipeline"]
},
{ {
"id": "neuralcoref", "id": "neuralcoref",
"slogan": "State-of-the-art coreference resolution based on neural nets and spaCy", "slogan": "State-of-the-art coreference resolution based on neural nets and spaCy",
@ -1525,17 +1485,6 @@
}, },
"category": ["nonpython"] "category": ["nonpython"]
}, },
{
"id": "spaCy.jl",
"slogan": "Julia interface for spaCy (work in progress)",
"github": "jekbradbury/SpaCy.jl",
"author": "James Bradbury",
"author_links": {
"github": "jekbradbury",
"twitter": "jekbradbury"
},
"category": ["nonpython"]
},
{ {
"id": "ruby-spacy", "id": "ruby-spacy",
"title": "ruby-spacy", "title": "ruby-spacy",
@ -1605,21 +1554,6 @@
}, },
"category": ["apis"] "category": ["apis"]
}, },
{
"id": "languagecrunch",
"slogan": "NLP server for spaCy, WordNet and NeuralCoref as a Docker image",
"github": "artpar/languagecrunch",
"code_example": [
"docker run -it -p 8080:8080 artpar/languagecrunch",
"curl http://localhost:8080/nlp/parse?`echo -n \"The new twitter is so weird. Seriously. Why is there a new twitter? What was wrong with the old one? Fix it now.\" | python -c \"import urllib, sys; print(urllib.urlencode({'sentence': sys.stdin.read()}))\"`"
],
"code_language": "bash",
"author": "Parth Mudgal",
"author_links": {
"github": "artpar"
},
"category": ["apis"]
},
{ {
"id": "spacy-nlp", "id": "spacy-nlp",
"slogan": " Expose spaCy NLP text parsing to Node.js (and other languages) via Socket.IO", "slogan": " Expose spaCy NLP text parsing to Node.js (and other languages) via Socket.IO",
@ -2194,43 +2128,6 @@
"category": ["standalone"], "category": ["standalone"],
"tags": ["question-answering", "elasticsearch"] "tags": ["question-answering", "elasticsearch"]
}, },
{
"id": "epitator",
"title": "EpiTator",
"thumb": "https://i.imgur.com/NYFY1Km.jpg",
"slogan": "Extracts case counts, resolved location/species/disease names, date ranges and more",
"description": "EcoHealth Alliance uses EpiTator to catalog the what, where and when of infectious disease case counts reported in online news. Each of these aspects is extracted using independent annotators than can be applied to other domains. EpiTator organizes annotations by creating \"AnnoTiers\" for each type. AnnoTiers have methods for manipulating, combining and searching annotations. For instance, the `with_following_spans_from()` method can be used to create a new tier that combines a tier of one type (such as numbers), with another (say, kitchenware). The resulting tier will contain all the phrases in the document that match that pattern, like \"5 plates\" or \"2 cups.\"\n\nAnother commonly used method is `group_spans_by_containing_span()` which can be used to do things like find all the spaCy tokens in all the GeoNames a document mentions. spaCy tokens, named entities, sentences and noun chunks are exposed through the spaCy annotator which will create a AnnoTier for each. These are basis of many of the other annotators. EpiTator also includes an annotator for extracting tables embedded in free text articles. Another neat feature is that the lexicons used for entity resolution are all stored in an embedded sqlite database so there is no need to run any external services in order to use EpiTator.",
"url": "https://github.com/ecohealthalliance/EpiTator",
"github": "ecohealthalliance/EpiTator",
"pip": "EpiTator",
"code_example": [
"from epitator.annotator import AnnoDoc",
"from epitator.geoname_annotator import GeonameAnnotator",
"",
"doc = AnnoDoc('Where is Chiang Mai?')",
"geoname_annotier = doc.require_tiers('geonames', via=GeonameAnnotator)",
"geoname = geoname_annotier.spans[0].metadata['geoname']",
"geoname['name']",
"# = 'Chiang Mai'",
"geoname['geonameid']",
"# = '1153671'",
"geoname['latitude']",
"# = 18.79038",
"geoname['longitude']",
"# = 98.98468",
"",
"from epitator.spacy_annotator import SpacyAnnotator",
"spacy_token_tier = doc.require_tiers('spacy.tokens', via=SpacyAnnotator)",
"list(geoname_annotier.group_spans_by_containing_span(spacy_token_tier))",
"# = [(AnnoSpan(9-19, Chiang Mai), [AnnoSpan(9-15, Chiang), AnnoSpan(16-19, Mai)])]"
],
"author": "EcoHealth Alliance",
"author_links": {
"github": "ecohealthalliance",
"website": " https://ecohealthalliance.org/"
},
"category": ["scientific", "standalone"]
},
{ {
"id": "self-attentive-parser", "id": "self-attentive-parser",
"title": "Berkeley Neural Parser", "title": "Berkeley Neural Parser",
@ -2259,30 +2156,6 @@
}, },
"category": ["research", "pipeline"] "category": ["research", "pipeline"]
}, },
{
"id": "excelcy",
"title": "ExcelCy",
"slogan": "Excel Integration with spaCy. Training NER using XLSX from PDF, DOCX, PPT, PNG or JPG.",
"description": "ExcelCy is a toolkit to integrate Excel to spaCy NLP training experiences. Training NER using XLSX from PDF, DOCX, PPT, PNG or JPG. ExcelCy has pipeline to match Entity with PhraseMatcher or Matcher in regular expression.",
"url": "https://github.com/kororo/excelcy",
"github": "kororo/excelcy",
"pip": "excelcy",
"code_example": [
"from excelcy import ExcelCy",
"# collect sentences, annotate Entities and train NER using spaCy",
"excelcy = ExcelCy.execute(file_path='https://github.com/kororo/excelcy/raw/master/tests/data/test_data_01.xlsx')",
"# use the nlp object as per spaCy API",
"doc = excelcy.nlp('Google rebrands its business apps')",
"# or save it for faster bootstrap for application",
"excelcy.nlp.to_disk('/model')"
],
"author": "Robertus Johansyah",
"author_links": {
"github": "kororo"
},
"category": ["training"],
"tags": ["excel"]
},
{ {
"id": "spacy-graphql", "id": "spacy-graphql",
"title": "spacy-graphql", "title": "spacy-graphql",
@ -2496,41 +2369,6 @@
}, },
"category": ["standalone", "conversational"] "category": ["standalone", "conversational"]
}, },
{
"id": "gracyql",
"title": "gracyql",
"slogan": "A thin GraphQL wrapper around spacy",
"github": "oterrier/gracyql",
"description": "An example of a basic [Starlette](https://github.com/encode/starlette) app using [Spacy](https://github.com/explosion/spaCy) and [Graphene](https://github.com/graphql-python/graphene). The main goal is to be able to use the amazing power of spaCy from other languages and retrieving only the information you need thanks to the GraphQL query definition. The GraphQL schema tries to mimic as much as possible the original Spacy API with classes Doc, Span and Token.",
"thumb": "https://i.imgur.com/xC7zpTO.png",
"category": ["apis"],
"tags": ["graphql"],
"code_example": [
"query ParserDisabledQuery {",
" nlp(model: \"en\", disable: [\"parser\", \"ner\"]) {",
" doc(text: \"I live in Grenoble, France\") {",
" text",
" tokens {",
" id",
" pos",
" lemma",
" dep",
" }",
" ents {",
" start",
" end",
" label",
" }",
" }",
" }",
"}"
],
"code_language": "json",
"author": "Olivier Terrier",
"author_links": {
"github": "oterrier"
}
},
{ {
"id": "pyInflect", "id": "pyInflect",
"slogan": "A Python module for word inflections", "slogan": "A Python module for word inflections",