mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Merge pull request #11074 from Schero1994/feature/remove
Batch #2 | spaCy universe cleanup
This commit is contained in:
commit
c7c3fb1d0c
|
@ -749,43 +749,6 @@
|
||||||
"category": ["standalone", "research"],
|
"category": ["standalone", "research"],
|
||||||
"tags": ["pytorch"]
|
"tags": ["pytorch"]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": "NeuroNER",
|
|
||||||
"title": "NeuroNER",
|
|
||||||
"slogan": "Named-entity recognition using neural networks",
|
|
||||||
"github": "Franck-Dernoncourt/NeuroNER",
|
|
||||||
"category": ["models"],
|
|
||||||
"pip": "pyneuroner[cpu]",
|
|
||||||
"code_example": [
|
|
||||||
"from neuroner import neuromodel",
|
|
||||||
"nn = neuromodel.NeuroNER(train_model=False, use_pretrained_model=True)"
|
|
||||||
],
|
|
||||||
"tags": ["standalone"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "NLPre",
|
|
||||||
"title": "NLPre",
|
|
||||||
"slogan": "Natural Language Preprocessing Library for health data and more",
|
|
||||||
"github": "NIHOPA/NLPre",
|
|
||||||
"pip": "nlpre",
|
|
||||||
"code_example": [
|
|
||||||
"from nlpre import titlecaps, dedash, identify_parenthetical_phrases",
|
|
||||||
"from nlpre import replace_acronyms, replace_from_dictionary",
|
|
||||||
"ABBR = identify_parenthetical_phrases()(text)",
|
|
||||||
"parsers = [dedash(), titlecaps(), replace_acronyms(ABBR),",
|
|
||||||
" replace_from_dictionary(prefix='MeSH_')]",
|
|
||||||
"for f in parsers:",
|
|
||||||
" text = f(text)",
|
|
||||||
"print(text)"
|
|
||||||
],
|
|
||||||
"category": ["scientific", "biomedical"],
|
|
||||||
"author": "Travis Hoppe",
|
|
||||||
"author_links": {
|
|
||||||
"github": "thoppe",
|
|
||||||
"twitter": "metasemantic",
|
|
||||||
"website": "http://thoppe.github.io/"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "Chatterbot",
|
"id": "Chatterbot",
|
||||||
"title": "Chatterbot",
|
"title": "Chatterbot",
|
||||||
|
@ -888,78 +851,6 @@
|
||||||
"github": "shigapov"
|
"github": "shigapov"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": "spacy_hunspell",
|
|
||||||
"slogan": "Add spellchecking and spelling suggestions to your spaCy pipeline using Hunspell",
|
|
||||||
"description": "This package uses the [spaCy 2.0 extensions](https://spacy.io/usage/processing-pipelines#extensions) to add [Hunspell](http://hunspell.github.io) support for spellchecking.",
|
|
||||||
"github": "tokestermw/spacy_hunspell",
|
|
||||||
"pip": "spacy_hunspell",
|
|
||||||
"code_example": [
|
|
||||||
"import spacy",
|
|
||||||
"from spacy_hunspell import spaCyHunSpell",
|
|
||||||
"",
|
|
||||||
"nlp = spacy.load('en_core_web_sm')",
|
|
||||||
"hunspell = spaCyHunSpell(nlp, 'mac')",
|
|
||||||
"nlp.add_pipe(hunspell)",
|
|
||||||
"doc = nlp('I can haz cheezeburger.')",
|
|
||||||
"haz = doc[2]",
|
|
||||||
"haz._.hunspell_spell # False",
|
|
||||||
"haz._.hunspell_suggest # ['ha', 'haze', 'hazy', 'has', 'hat', 'had', 'hag', 'ham', 'hap', 'hay', 'haw', 'ha z']"
|
|
||||||
],
|
|
||||||
"author": "Motoki Wu",
|
|
||||||
"author_links": {
|
|
||||||
"github": "tokestermw",
|
|
||||||
"twitter": "plusepsilon"
|
|
||||||
},
|
|
||||||
"category": ["pipeline"],
|
|
||||||
"tags": ["spellcheck"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "spacy_grammar",
|
|
||||||
"slogan": "Language Tool style grammar handling with spaCy",
|
|
||||||
"description": "This packages leverages the [Matcher API](https://spacy.io/docs/usage/rule-based-matching) in spaCy to quickly match on spaCy tokens not dissimilar to regex. It reads a `grammar.yml` file to load up custom patterns and returns the results inside `Doc`, `Span`, and `Token`. It is extensible through adding rules to `grammar.yml` (though currently only the simple string matching is implemented).",
|
|
||||||
"github": "tokestermw/spacy_grammar",
|
|
||||||
"code_example": [
|
|
||||||
"import spacy",
|
|
||||||
"from spacy_grammar.grammar import Grammar",
|
|
||||||
"",
|
|
||||||
"nlp = spacy.load('en')",
|
|
||||||
"grammar = Grammar(nlp)",
|
|
||||||
"nlp.add_pipe(grammar)",
|
|
||||||
"doc = nlp('I can haz cheeseburger.')",
|
|
||||||
"doc._.has_grammar_error # True"
|
|
||||||
],
|
|
||||||
"author": "Motoki Wu",
|
|
||||||
"author_links": {
|
|
||||||
"github": "tokestermw",
|
|
||||||
"twitter": "plusepsilon"
|
|
||||||
},
|
|
||||||
"category": ["pipeline"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": "spacy_kenlm",
|
|
||||||
"slogan": "KenLM extension for spaCy 2.0",
|
|
||||||
"github": "tokestermw/spacy_kenlm",
|
|
||||||
"pip": "spacy_kenlm",
|
|
||||||
"code_example": [
|
|
||||||
"import spacy",
|
|
||||||
"from spacy_kenlm import spaCyKenLM",
|
|
||||||
"",
|
|
||||||
"nlp = spacy.load('en_core_web_sm')",
|
|
||||||
"spacy_kenlm = spaCyKenLM() # default model from test.arpa",
|
|
||||||
"nlp.add_pipe(spacy_kenlm)",
|
|
||||||
"doc = nlp('How are you?')",
|
|
||||||
"doc._.kenlm_score # doc score",
|
|
||||||
"doc[:2]._.kenlm_score # span score",
|
|
||||||
"doc[2]._.kenlm_score # token score"
|
|
||||||
],
|
|
||||||
"author": "Motoki Wu",
|
|
||||||
"author_links": {
|
|
||||||
"github": "tokestermw",
|
|
||||||
"twitter": "plusepsilon"
|
|
||||||
},
|
|
||||||
"category": ["pipeline"]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "spacy_readability",
|
"id": "spacy_readability",
|
||||||
"slogan": "Add text readability meta data to Doc objects",
|
"slogan": "Add text readability meta data to Doc objects",
|
||||||
|
@ -1028,34 +919,6 @@
|
||||||
},
|
},
|
||||||
"category": ["pipeline"]
|
"category": ["pipeline"]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": "spacy-lookup",
|
|
||||||
"slogan": "A powerful entity matcher for very large dictionaries, using the FlashText module",
|
|
||||||
"description": "spaCy v2.0 extension and pipeline component for adding Named Entities metadata to `Doc` objects. Detects Named Entities using dictionaries. The extension sets the custom `Doc`, `Token` and `Span` attributes `._.is_entity`, `._.entity_type`, `._.has_entities` and `._.entities`. Named Entities are matched using the python module `flashtext`, and looked up in the data provided by different dictionaries.",
|
|
||||||
"github": "mpuig/spacy-lookup",
|
|
||||||
"pip": "spacy-lookup",
|
|
||||||
"code_example": [
|
|
||||||
"import spacy",
|
|
||||||
"from spacy_lookup import Entity",
|
|
||||||
"",
|
|
||||||
"nlp = spacy.load('en')",
|
|
||||||
"entity = Entity(keywords_list=['python', 'product manager', 'java platform'])",
|
|
||||||
"nlp.add_pipe(entity, last=True)",
|
|
||||||
"",
|
|
||||||
"doc = nlp(\"I am a product manager for a java and python.\")",
|
|
||||||
"assert doc._.has_entities == True",
|
|
||||||
"assert doc[0]._.is_entity == False",
|
|
||||||
"assert doc[3]._.entity_desc == 'product manager'",
|
|
||||||
"assert doc[3]._.is_entity == True",
|
|
||||||
"",
|
|
||||||
"print([(token.text, token._.canonical) for token in doc if token._.is_entity])"
|
|
||||||
],
|
|
||||||
"author": "Marc Puig",
|
|
||||||
"author_links": {
|
|
||||||
"github": "mpuig"
|
|
||||||
},
|
|
||||||
"category": ["pipeline"]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "spacy-iwnlp",
|
"id": "spacy-iwnlp",
|
||||||
"slogan": "German lemmatization with IWNLP",
|
"slogan": "German lemmatization with IWNLP",
|
||||||
|
@ -1322,21 +1185,6 @@
|
||||||
"github": "huggingface"
|
"github": "huggingface"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": "spacy-vis",
|
|
||||||
"slogan": "A visualisation tool for spaCy using Hierplane",
|
|
||||||
"description": "A visualiser for spaCy annotations. This visualisation uses the [Hierplane](https://allenai.github.io/hierplane/) Library to render the dependency parse from spaCy's models. It also includes visualisation of entities and POS tags within nodes.",
|
|
||||||
"github": "DeNeutoy/spacy-vis",
|
|
||||||
"url": "http://spacyvis.allennlp.org/spacy-parser",
|
|
||||||
"thumb": "https://i.imgur.com/DAG9QFd.jpg",
|
|
||||||
"image": "https://raw.githubusercontent.com/DeNeutoy/spacy-vis/master/img/example.gif",
|
|
||||||
"author": "Mark Neumann",
|
|
||||||
"author_links": {
|
|
||||||
"twitter": "MarkNeumannnn",
|
|
||||||
"github": "DeNeutoy"
|
|
||||||
},
|
|
||||||
"category": ["visualizers"]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "matcher-explorer",
|
"id": "matcher-explorer",
|
||||||
"title": "Rule-based Matcher Explorer",
|
"title": "Rule-based Matcher Explorer",
|
||||||
|
@ -2340,29 +2188,6 @@
|
||||||
"youtube": "8u57WSXVpmw",
|
"youtube": "8u57WSXVpmw",
|
||||||
"category": ["videos"]
|
"category": ["videos"]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": "adam_qas",
|
|
||||||
"title": "ADAM: Question Answering System",
|
|
||||||
"slogan": "A question answering system that extracts answers from Wikipedia to questions posed in natural language.",
|
|
||||||
"github": "5hirish/adam_qas",
|
|
||||||
"pip": "qas",
|
|
||||||
"code_example": [
|
|
||||||
"git clone https://github.com/5hirish/adam_qas.git",
|
|
||||||
"cd adam_qas",
|
|
||||||
"pip install -r requirements.txt",
|
|
||||||
"python -m qas.adam 'When was linux kernel version 4.0 released ?'"
|
|
||||||
],
|
|
||||||
"code_language": "bash",
|
|
||||||
"thumb": "https://shirishkadam.files.wordpress.com/2018/04/mini_alleviate.png",
|
|
||||||
"author": "Shirish Kadam",
|
|
||||||
"author_links": {
|
|
||||||
"twitter": "5hirish",
|
|
||||||
"github": "5hirish",
|
|
||||||
"website": "https://shirishkadam.com/"
|
|
||||||
},
|
|
||||||
"category": ["standalone"],
|
|
||||||
"tags": ["question-answering", "elasticsearch"]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "self-attentive-parser",
|
"id": "self-attentive-parser",
|
||||||
"title": "Berkeley Neural Parser",
|
"title": "Berkeley Neural Parser",
|
||||||
|
@ -2460,20 +2285,6 @@
|
||||||
"category": ["nonpython"],
|
"category": ["nonpython"],
|
||||||
"tags": ["javascript"]
|
"tags": ["javascript"]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": "spacy-raspberry",
|
|
||||||
"title": "spacy-raspberry",
|
|
||||||
"slogan": "64bit Raspberry Pi image for spaCy and neuralcoref",
|
|
||||||
"github": "boehm-e/spacy-raspberry",
|
|
||||||
"thumb": "https://i.imgur.com/VCJMrE6.png",
|
|
||||||
"image": "https://raw.githubusercontent.com/boehm-e/spacy-raspberry/master/imgs/preview.png",
|
|
||||||
"author": "Erwan Boehm",
|
|
||||||
"author_links": {
|
|
||||||
"github": "boehm-e"
|
|
||||||
},
|
|
||||||
"category": ["apis"],
|
|
||||||
"tags": ["raspberrypi"]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "spacy-wordnet",
|
"id": "spacy-wordnet",
|
||||||
"title": "spacy-wordnet",
|
"title": "spacy-wordnet",
|
||||||
|
@ -2544,35 +2355,6 @@
|
||||||
"category": ["standalone", "pipeline"],
|
"category": ["standalone", "pipeline"],
|
||||||
"tags": ["linguistics", "computational linguistics", "conll", "conll-u"]
|
"tags": ["linguistics", "computational linguistics", "conll", "conll-u"]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": "spacy-langdetect",
|
|
||||||
"title": "spacy-langdetect",
|
|
||||||
"slogan": "A fully customizable language detection pipeline for spaCy",
|
|
||||||
"description": "This module allows you to add language detection capabilites to your spaCy pipeline. Also supports custom language detectors!",
|
|
||||||
"pip": "spacy-langdetect",
|
|
||||||
"code_example": [
|
|
||||||
"import spacy",
|
|
||||||
"from spacy_langdetect import LanguageDetector",
|
|
||||||
"nlp = spacy.load('en')",
|
|
||||||
"nlp.add_pipe(LanguageDetector(), name='language_detector', last=True)",
|
|
||||||
"text = 'This is an english text.'",
|
|
||||||
"doc = nlp(text)",
|
|
||||||
"# document level language detection. Think of it like average language of the document!",
|
|
||||||
"print(doc._.language)",
|
|
||||||
"# sentence level language detection",
|
|
||||||
"for sent in doc.sents:",
|
|
||||||
" print(sent, sent._.language)"
|
|
||||||
],
|
|
||||||
"code_language": "python",
|
|
||||||
"author": "Abhijit Balaji",
|
|
||||||
"author_links": {
|
|
||||||
"github": "Abhijit-2592",
|
|
||||||
"website": "https://abhijit-2592.github.io/"
|
|
||||||
},
|
|
||||||
"github": "Abhijit-2592/spacy-langdetect",
|
|
||||||
"category": ["pipeline"],
|
|
||||||
"tags": ["language-detection"]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "ludwig",
|
"id": "ludwig",
|
||||||
"title": "Ludwig",
|
"title": "Ludwig",
|
||||||
|
@ -3071,35 +2853,6 @@
|
||||||
],
|
],
|
||||||
"author": "Stefan Daniel Dumitrescu, Andrei-Marius Avram"
|
"author": "Stefan Daniel Dumitrescu, Andrei-Marius Avram"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": "num_fh",
|
|
||||||
"title": "Numeric Fused-Head",
|
|
||||||
"slogan": "Numeric Fused-Head Identificaiton and Resolution in English",
|
|
||||||
"description": "This package provide a wrapper for the Numeric Fused-Head in English. It provides another information layer on numbers that refer to another entity which is not obvious from the syntactic tree.",
|
|
||||||
"github": "yanaiela/num_fh",
|
|
||||||
"pip": "num_fh",
|
|
||||||
"category": ["pipeline", "research"],
|
|
||||||
"code_example": [
|
|
||||||
"import spacy",
|
|
||||||
"from num_fh import NFH",
|
|
||||||
"nlp = spacy.load('en_core_web_sm')",
|
|
||||||
"nfh = NFH(nlp)",
|
|
||||||
"nlp.add_pipe(nfh, first=False)",
|
|
||||||
"doc = nlp(\"I told you two, that only one of them is the one who will get 2 or 3 icecreams\")",
|
|
||||||
"",
|
|
||||||
"assert doc[16]._.is_nfh == True",
|
|
||||||
"assert doc[18]._.is_nfh == False",
|
|
||||||
"assert doc[3]._.is_deter_nfh == True",
|
|
||||||
"assert doc[16]._.is_deter_nfh == False",
|
|
||||||
"assert len(doc._.nfh) == 4"
|
|
||||||
],
|
|
||||||
"author": "Yanai Elazar",
|
|
||||||
"author_links": {
|
|
||||||
"github": "yanaiela",
|
|
||||||
"twitter": "yanaiela",
|
|
||||||
"website": "https://yanaiela.github.io"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": "Healthsea",
|
"id": "Healthsea",
|
||||||
"title": "Healthsea",
|
"title": "Healthsea",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user