spaCy/website/meta/universe.json

{
    "resources": [
        {
            "id": "spacy-streamlit",
            "title": "spacy-streamlit",
            "slogan": "spaCy building blocks for Streamlit apps",
            "github": "explosion/spacy-streamlit",
            "description": "This package contains utilities for visualizing spaCy models and building interactive spaCy-powered apps with [Streamlit](https://streamlit.io). It includes various building blocks you can use in your own Streamlit app, like visualizers for **syntactic dependencies**, **named entities**, **text classification**, **semantic similarity** via word vectors, token attributes, and more.",
            "pip": "spacy-streamlit",
            "category": ["visualizers"],
            "thumb": "https://i.imgur.com/mhEjluE.jpg",
            "image": "https://user-images.githubusercontent.com/13643239/85388081-f2da8700-b545-11ea-9bd4-e303d3c5763c.png",
            "code_example": [
                "import spacy_streamlit",
                "",
                "models = [\"en_core_web_sm\", \"en_core_web_md\"]",
                "default_text = \"Sundar Pichai is the CEO of Google.\"",
                "spacy_streamlit.visualize(models, default_text))"
            ],
            "author": "Ines Montani",
            "author_links": {
                "twitter": "_inesmontani",
                "github": "ines",
                "website": "https://ines.io"
            }
        },
        {
            "id": "spaczz",
            "title": "spaczz",
            "slogan": "Fuzzy matching and more for spaCy.",
            "description": "Spaczz provides fuzzy matching and multi-token regex matching functionality for spaCy. Spaczz's components have similar APIs to their spaCy counterparts and spaczz pipeline components can integrate into spaCy pipelines where they can be saved/loaded as models.",
            "github": "gandersen101/spaczz",
            "pip": "spaczz",
            "code_example": [
                "import spacy",
                "from spaczz.pipeline import SpaczzRuler",
                "",
                "nlp = spacy.blank('en')",
                "ruler = SpaczzRuler(nlp)",
                "ruler.add_patterns([{'label': 'PERSON', 'pattern': 'Bill Gates', 'type': 'fuzzy'}])",
                "nlp.add_pipe(ruler)",
                "",
                "doc = nlp('Oops, I spelled Bill Gatez wrong.')",
                "print([(ent.text, ent.start, ent.end, ent.label_) for ent in doc.ents])"
            ],
            "code_language": "python",
            "url": "https://spaczz.readthedocs.io/en/latest/",
            "author": "Grant Andersen",
            "author_links": {
                "twitter": "gandersen101",
                "github": "gandersen101"
            },
            "category": ["pipeline"],
            "tags": ["fuzzy-matching", "regex"]
        },
        {
            "id": "spacy-universal-sentence-encoder",
            "title": "SpaCy - Universal Sentence Encoder",
            "slogan": "Make use of Google's Universal Sentence Encoder directly within SpaCy",
            "description": "This library lets you use Universal Sentence Encoder embeddings of Docs, Spans and Tokens directly from TensorFlow Hub",
            "github": "MartinoMensio/spacy-universal-sentence-encoder-tfhub",
            "code_example": [
                "import spacy_universal_sentence_encoder",
                "load one of the models: ['en_use_md', 'en_use_lg', 'xx_use_md', 'xx_use_lg']",
                "nlp = spacy_universal_sentence_encoder.load_model('en_use_lg')",
                "# get two documents",
                "doc_1 = nlp('Hi there, how are you?')",
                "doc_2 = nlp('Hello there, how are you doing today?')",
                "# use the similarity method that is based on the vectors, on Doc, Span or Token",
                "print(doc_1.similarity(doc_2[0:7]))"
            ],
            "category": ["models", "pipeline"],
            "author": "Martino Mensio",
            "author_links": {
                "twitter": "MartinoMensio",
                "github": "MartinoMensio",
                "website": "https://martinomensio.github.io"
            }
        },
        {
            "id": "whatlies",
            "title": "whatlies",
            "slogan": "Make interactive visualisations to figure out 'what lies' in word embeddings.",
            "description": "This small library offers tools to make visualisation easier of both word embeddings as well as operations on them. It has support for spaCy prebuilt models as a first class citizen but also offers support for sense2vec. There's a convenient API to perform linear algebra as well as support for popular transformations like PCA/UMAP/etc.",
            "github": "rasahq/whatlies",
            "pip": "whatlies",
            "thumb": "https://i.imgur.com/rOkOiLv.png",
            "image": "https://raw.githubusercontent.com/RasaHQ/whatlies/master/docs/gif-two.gif",
            "code_example": [
                "from whatlies import EmbeddingSet",
                "from whatlies.language import SpacyLanguage",
                "",
                "lang = SpacyLanguage('en_core_web_md')",
                "words = ['cat', 'dog', 'fish', 'kitten', 'man', 'woman', 'king', 'queen', 'doctor', 'nurse']",
                "",
                "emb = lang[words]",
                "emb.plot_interactive(x_axis='man', y_axis='woman')"
            ],
            "category": ["visualizers", "research"],
            "author": "Vincent D. Warmerdam",
            "author_links": {
                "twitter": "fishnets88",
                "github": "koaning",
                "website": "https://koaning.io"
            }
        },
        {
            "id": "spacy-stanza",
            "title": "spacy-stanza",
            "slogan": "Use the latest Stanza (StanfordNLP) research models directly in spaCy",
            "description": "This package wraps the Stanza (formerly StanfordNLP) library, so you can use Stanford's models as a spaCy pipeline. Using this wrapper, you'll be able to use the following annotations, computed by your pretrained `stanza` model:\n\n- Statistical tokenization (reflected in the `Doc` and its tokens)\n - Lemmatization (`token.lemma` and `token.lemma_`)\n - Part-of-speech tagging (`token.tag`, `token.tag_`, `token.pos`, `token.pos_`)\n - Dependency parsing (`token.dep`, `token.dep_`, `token.head`)\n - Named entity recognition (`doc.ents`, `token.ent_type`, `token.ent_type_`, `token.ent_iob`, `token.ent_iob_`)\n - Sentence segmentation (`doc.sents`)",
            "github": "explosion/spacy-stanza",
            "pip": "spacy-stanza",
            "thumb": "https://i.imgur.com/myhLjMJ.png",
            "code_example": [
                "import stanza",
                "from spacy_stanza import StanzaLanguage",
                "",
                "snlp = stanza.Pipeline(lang=\"en\")",
                "nlp = StanzaLanguage(snlp)",
                "",
                "doc = nlp(\"Barack Obama was born in Hawaii. He was elected president in 2008.\")",
                "for token in doc:",
                "    print(token.text, token.lemma_, token.pos_, token.dep_, token.ent_type_)",
                "print(doc.ents)"
            ],
            "category": ["pipeline", "standalone", "models", "research"],
            "author": "Explosion",
            "author_links": {
                "twitter": "explosion_ai",
                "github": "explosion",
                "website": "https://explosion.ai"
            }
        },
        {
            "id": "spacy-server",
            "title": "spaCy Server",
            "slogan": "\uD83E\uDD9C Containerized HTTP API for spaCy NLP",
            "description": "For developers who need programming language agnostic NLP, spaCy Server is a containerized HTTP API that provides industrial-strength natural language processing. Unlike other servers, our server is fast, idiomatic, and well documented.",
            "github": "neelkamath/spacy-server",
            "code_example": [
                "docker run --rm -dp 8080:8080 neelkamath/spacy-server",
                "curl http://localhost:8080/ner -H 'Content-Type: application/json' -d '{\"sections\": [\"My name is John Doe. I grew up in California.\"]}'"
            ],
            "code_language": "shell",
            "url": "https://hub.docker.com/r/neelkamath/spacy-server",
            "author": "Neel Kamath",
            "author_links": {
                "github": "neelkamath",
                "website": "https://neelkamath.com"
            },
            "category": ["apis"],
            "tags": ["docker"]
        },
        {
            "id": "nlp-architect",
            "title": "NLP Architect",
            "slogan": "Python lib for exploring Deep NLP & NLU by Intel AI",
            "github": "NervanaSystems/nlp-architect",
            "pip": "nlp-architect",
            "thumb": "https://i.imgur.com/vMideRx.png",
            "category": ["standalone", "research"],
            "tags": ["pytorch"]
        },
        {
            "id": "NeuroNER",
            "title": "NeuroNER",
            "slogan": "Named-entity recognition using neural networks",
            "github": "Franck-Dernoncourt/NeuroNER",
            "pip": "pyneuroner[cpu]",
            "code_example": [
                "from neuroner import neuromodel",
                "nn = neuromodel.NeuroNER(train_model=False, use_pretrained_model=True)"
            ],
            "category": ["ner"],
            "tags": ["standalone"]
        },
        {
            "id": "NLPre",
            "title": "NLPre",
            "slogan": "Natural Language Preprocessing Library for health data and more",
            "github": "NIHOPA/NLPre",
            "pip": "nlpre",
            "code_example": [
                "from nlpre import titlecaps, dedash, identify_parenthetical_phrases",
                "from nlpre import replace_acronyms, replace_from_dictionary",
                "ABBR = identify_parenthetical_phrases()(text)",
                "parsers = [dedash(), titlecaps(), replace_acronyms(ABBR),",
                "        replace_from_dictionary(prefix='MeSH_')]",
                "for f in parsers:",
                "    text = f(text)",
                "print(text)"
            ],
            "category": ["scientific", "biomedical"],
            "author": "Travis Hoppe",
            "author_links": {
                "github": "thoppe",
                "twitter": "metasemantic",
                "website": "http://thoppe.github.io/"
            }
        },
        {
            "id": "Chatterbot",
            "title": "Chatterbot",
            "slogan": "A machine-learning based conversational dialog engine for creating chat bots",
            "github": "gunthercox/ChatterBot",
            "pip": "chatterbot",
            "thumb": "https://i.imgur.com/eyAhwXk.jpg",
            "code_example": [
                "from chatterbot import ChatBot",
                "from chatterbot.trainers import ListTrainer",
                "# Create a new chat bot named Charlie",
                "chatbot = ChatBot('Charlie')",
                "trainer = ListTrainer(chatbot)",
                "trainer.train([",
                "'Hi, can I help you?',",
                "'Sure, I would like to book a flight to Iceland.",
                "'Your flight has been booked.'",
                "])",
                "",
                "response = chatbot.get_response('I would like to book a flight.')"
            ],
            "author": "Gunther Cox",
            "author_links": {
                "github": "gunthercox"
            },
            "category": ["conversational", "standalone"],
            "tags": ["chatbots"]
        },
        {
            "id": "saber",
            "title": "saber",
            "slogan": "Deep-learning based tool for information extraction in the biomedical domain",
            "github": "BaderLab/saber",
            "pip": "saber",
            "thumb": "https://raw.githubusercontent.com/BaderLab/saber/master/docs/img/saber_logo.png",
            "code_example": [
                "from saber.saber import Saber",
                "saber = Saber()",
                "saber.load('PRGE')",
                "saber.annotate('The phosphorylation of Hdm2 by MK2 promotes the ubiquitination of p53.')"
            ],
            "author": "Bader Lab, University of Toronto",
            "category": ["scientific"],
            "tags": ["keras", "biomedical"]
        },
        {
            "id": "alibi",
            "title": "alibi",
            "slogan": "Algorithms for monitoring and explaining machine learning models ",
            "github": "SeldonIO/alibi",
            "pip": "alibi",
            "thumb": "https://i.imgur.com/YkzQHRp.png",
            "code_example": [
                "from alibi.explainers import AnchorTabular",
                "explainer = AnchorTabular(predict_fn, feature_names)",
                "explainer.fit(X_train)",
                "explainer.explain(x)"
            ],
            "author": "Seldon",
            "category": ["standalone", "research"]
        },
        {
            "id": "spacymoji",
            "slogan": "Emoji handling and meta data as a spaCy pipeline component",
            "github": "ines/spacymoji",
            "description": "spaCy v2.0 extension and pipeline component for adding emoji meta data to `Doc` objects. Detects emoji consisting of one or more unicode characters, and can optionally merge multi-char emoji (combined pictures, emoji with skin tone modifiers) into one token. Human-readable emoji descriptions are added as a custom attribute, and an optional lookup table can be provided for your own descriptions. The extension sets the custom `Doc`, `Token` and `Span` attributes `._.is_emoji`, `._.emoji_desc`, `._.has_emoji` and `._.emoji`.",
            "pip": "spacymoji",
            "category": ["pipeline"],
            "tags": ["emoji", "unicode"],
            "thumb": "https://i.imgur.com/XOTYIgn.jpg",
            "code_example": [
                "import spacy",
                "from spacymoji import Emoji",
                "",
                "nlp = spacy.load('en')",
                "emoji = Emoji(nlp)",
                "nlp.add_pipe(emoji, first=True)",
                "",
                "doc = nlp('This is a test 😻 👍🏿')",
                "assert doc._.has_emoji == True",
                "assert doc[2:5]._.has_emoji == True",
                "assert doc[0]._.is_emoji == False",
                "assert doc[4]._.is_emoji == True",
                "assert doc[5]._.emoji_desc == 'thumbs up dark skin tone'",
                "assert len(doc._.emoji) == 2",
                "assert doc._.emoji[1] == ('👍🏿', 5, 'thumbs up dark skin tone')"
            ],
            "author": "Ines Montani",
            "author_links": {
                "twitter": "_inesmontani",
                "github": "ines",
                "website": "https://ines.io"
            }
        },
        {
            "id": "spacy_hunspell",
            "slogan": "Add spellchecking and spelling suggestions to your spaCy pipeline using Hunspell",
            "description": "This package uses the [spaCy 2.0 extensions](https://spacy.io/usage/processing-pipelines#extensions) to add [Hunspell](http://hunspell.github.io) support for spellchecking.",
            "github": "tokestermw/spacy_hunspell",
            "pip": "spacy_hunspell",
            "code_example": [
                "import spacy",
                "from spacy_hunspell import spaCyHunSpell",
                "",
                "nlp = spacy.load('en_core_web_sm')",
                "hunspell = spaCyHunSpell(nlp, 'mac')",
                "nlp.add_pipe(hunspell)",
                "doc = nlp('I can haz cheezeburger.')",
                "haz = doc[2]",
                "haz._.hunspell_spell  # False",
                "haz._.hunspell_suggest  # ['ha', 'haze', 'hazy', 'has', 'hat', 'had', 'hag', 'ham', 'hap', 'hay', 'haw', 'ha z']"
            ],
            "author": "Motoki Wu",
            "author_links": {
                "github": "tokestermw",
                "twitter": "plusepsilon"
            },
            "category": ["pipeline"],
            "tags": ["spellcheck"]
        },
        {
            "id": "spacy_grammar",
            "slogan": "Language Tool style grammar handling with spaCy",
            "description": "This packages leverages the [Matcher API](https://spacy.io/docs/usage/rule-based-matching) in spaCy to quickly match on spaCy tokens not dissimilar to regex. It  reads a `grammar.yml` file to load up custom patterns and returns the results inside `Doc`, `Span`, and `Token`. It is extensible through adding rules to `grammar.yml` (though currently only the simple string matching is implemented).",
            "github": "tokestermw/spacy_grammar",
            "code_example": [
                "import spacy",
                "from spacy_grammar.grammar import Grammar",
                "",
                "nlp = spacy.load('en')",
                "grammar = Grammar(nlp)",
                "nlp.add_pipe(grammar)",
                "doc = nlp('I can haz cheeseburger.')",
                "doc._.has_grammar_error  # True"
            ],
            "author": "Motoki Wu",
            "author_links": {
                "github": "tokestermw",
                "twitter": "plusepsilon"
            },
            "category": ["pipeline"]
        },
        {
            "id": "spacy_kenlm",
            "slogan": "KenLM extension for spaCy 2.0",
            "github": "tokestermw/spacy_kenlm",
            "pip": "spacy_kenlm",
            "code_example": [
                "import spacy",
                "from spacy_kenlm import spaCyKenLM",
                "",
                "nlp = spacy.load('en_core_web_sm')",
                "spacy_kenlm = spaCyKenLM()  # default model from test.arpa",
                "nlp.add_pipe(spacy_kenlm)",
                "doc = nlp('How are you?')",
                "doc._.kenlm_score # doc score",
                "doc[:2]._.kenlm_score # span score",
                "doc[2]._.kenlm_score # token score"
            ],
            "author": "Motoki Wu",
            "author_links": {
                "github": "tokestermw",
                "twitter": "plusepsilon"
            },
            "category": ["pipeline"]
        },
        {
            "id": "spacy_readability",
            "slogan": "Add text readability meta data to Doc objects",
            "description": "spaCy v2.0 pipeline component for calculating readability scores of of text. Provides scores for Flesh-Kincaid grade level, Flesh-Kincaid reading ease, and Dale-Chall.",
            "github": "mholtzscher/spacy_readability",
            "pip": "spacy-readability",
            "code_example": [
                "import spacy",
                "from spacy_readability import Readability",
                "",
                "nlp = spacy.load('en')",
                "read = Readability(nlp)",
                "nlp.add_pipe(read, last=True)",
                "doc = nlp(\"I am some really difficult text to read because I use obnoxiously large words.\")",
                "doc._.flesch_kincaid_grade_level",
                "doc._.flesch_kincaid_reading_ease",
                "doc._.dale_chall"
            ],
            "author": "Michael Holtzscher",
            "author_links": {
                "github": "mholtzscher"
            },
            "category": ["pipeline"]
        },
        {
            "id": "spacy-sentence-segmenter",
            "title": "Sentence Segmenter",
            "slogan": "Custom sentence segmentation for spaCy",
            "code_example": [
                "from seg.newline.segmenter import NewLineSegmenter",
                "import spacy",
                "",
                "nlseg = NewLineSegmenter()",
                "nlp = spacy.load('en')",
                "nlp.add_pipe(nlseg.set_sent_starts, name='sentence_segmenter', before='parser')",
                "doc = nlp(my_doc_text)"
            ],
            "author": "tc64",
            "author_links": {
                "github": "tc64"
            },
            "category": ["pipeline"]
        },
        {
            "id": "spacy_cld",
            "title": "spaCy-CLD",
            "slogan": "Add language detection to your spaCy pipeline using CLD2",
            "description": "spaCy-CLD operates on `Doc` and `Span` spaCy objects. When called on a `Doc` or `Span`, the object is given two attributes: `languages` (a list of up to 3 language codes) and `language_scores` (a dictionary mapping language codes to confidence scores between 0 and 1).\n\nspacy-cld is a little extension that wraps the [PYCLD2](https://github.com/aboSamoor/pycld2) Python library, which in turn wraps the [Compact Language Detector 2](https://github.com/CLD2Owners/cld2) C library originally built at Google for the Chromium project. CLD2 uses character n-grams as features and a Naive Bayes classifier to identify 80+ languages from Unicode text strings (or XML/HTML). It can detect up to 3 different languages in a given document, and reports a confidence score (reported in with each language.",
            "github": "nickdavidhaynes/spacy-cld",
            "pip": "spacy_cld",
            "code_example": [
                "import spacy",
                "from spacy_cld import LanguageDetector",
                "",
                "nlp = spacy.load('en')",
                "language_detector = LanguageDetector()",
                "nlp.add_pipe(language_detector)",
                "doc = nlp('This is some English text.')",
                "",
                "doc._.languages  # ['en']",
                "doc._.language_scores['en']  # 0.96"
            ],
            "author": "Nicholas D Haynes",
            "author_links": {
                "github": "nickdavidhaynes"
            },
            "category": ["pipeline"]
        },
        {
            "id": "spacy-lookup",
            "slogan": "A powerful entity matcher for very large dictionaries, using the FlashText module",
            "description": "spaCy v2.0 extension and pipeline component for adding Named Entities metadata to `Doc` objects. Detects Named Entities using dictionaries. The extension sets the custom `Doc`, `Token` and `Span` attributes `._.is_entity`, `._.entity_type`, `._.has_entities` and `._.entities`. Named Entities are matched using the python module `flashtext`, and looked up in the data provided by different dictionaries.",
            "github": "mpuig/spacy-lookup",
            "pip": "spacy-lookup",
            "code_example": [
                "import spacy",
                "from spacy_lookup import Entity",
                "",
                "nlp = spacy.load('en')",
                "entity = Entity(keywords_list=['python', 'java platform'])",
                "nlp.add_pipe(entity, last=True)",
                "",
                "doc = nlp(\"I am a product manager for a java and python.\")",
                "assert doc._.has_entities == True",
                "assert doc[2:5]._.has_entities == True",
                "assert doc[0]._.is_entity == False",
                "assert doc[3]._.is_entity == True",
                "print(doc._.entities)"
            ],
            "author": "Marc Puig",
            "author_links": {
                "github": "mpuig"
            },
            "category": ["pipeline"]
        },
        {
            "id": "spacy-iwnlp",
            "slogan": "German lemmatization with IWNLP",
            "description": "This package uses the [spaCy 2.0 extensions](https://spacy.io/usage/processing-pipelines#extensions) to add [IWNLP-py](https://github.com/Liebeck/iwnlp-py) as German lemmatizer directly into your spaCy pipeline.",
            "github": "Liebeck/spacy-iwnlp",
            "pip": "spacy-iwnlp",
            "code_example": [
                "import spacy",
                "from spacy_iwnlp import spaCyIWNLP",
                "",
                "nlp = spacy.load('de')",
                "iwnlp = spaCyIWNLP(lemmatizer_path='data/IWNLP.Lemmatizer_20170501.json')",
                "nlp.add_pipe(iwnlp)",
                "doc = nlp('Wir mögen Fußballspiele mit ausgedehnten Verlängerungen.')",
                "for token in doc:",
                "    print('POS: {}\tIWNLP:{}'.format(token.pos_, token._.iwnlp_lemmas))"
            ],
            "author": "Matthias Liebeck",
            "author_links": {
                "github": "Liebeck"
            },
            "category": ["pipeline"],
            "tags": ["lemmatizer", "german"]
        },
        {
            "id": "spacy-sentiws",
            "slogan": "German sentiment scores with SentiWS",
            "description": "This package uses the [spaCy 2.0 extensions](https://spacy.io/usage/processing-pipelines#extensions) to add [SentiWS](http://wortschatz.uni-leipzig.de/en/download) as German sentiment score directly into your spaCy pipeline.",
            "github": "Liebeck/spacy-sentiws",
            "pip": "spacy-sentiws",
            "code_example": [
                "import spacy",
                "from spacy_sentiws import spaCySentiWS",
                "",
                "nlp = spacy.load('de')",
                "sentiws = spaCySentiWS(sentiws_path='data/sentiws/')",
                "nlp.add_pipe(sentiws)",
                "doc = nlp('Die Dummheit der Unterwerfung blüht in hübschen Farben.')",
                "",
                "for token in doc:",
                "    print('{}, {}, {}'.format(token.text, token._.sentiws, token.pos_))"
            ],
            "author": "Matthias Liebeck",
            "author_links": {
                "github": "Liebeck"
            },
            "category": ["pipeline"],
            "tags": ["sentiment", "german"]
        },
        {
            "id": "spacy-lefff",
            "slogan": "POS and French lemmatization with Lefff",
            "description": "spacy v2.0 extension and pipeline component for adding a French POS and lemmatizer based on [Lefff](https://hal.inria.fr/inria-00521242/).",
            "github": "sammous/spacy-lefff",
            "pip": "spacy-lefff",
            "code_example": [
                "import spacy",
                "from spacy_lefff import LefffLemmatizer, POSTagger",
                "",
                "nlp = spacy.load('fr')",
                "pos = POSTagger()",
                "french_lemmatizer = LefffLemmatizer(after_melt=True)",
                "nlp.add_pipe(pos, name='pos', after='parser')",
                "nlp.add_pipe(french_lemmatizer, name='lefff', after='pos')",
                "doc = nlp(u\"Paris est une ville très chère.\")",
                "for d in doc:",
                "    print(d.text, d.pos_, d._.melt_tagger, d._.lefff_lemma, d.tag_, d.lemma_)"
            ],
            "author": "Sami Moustachir",
            "author_links": {
                "github": "sammous"
            },
            "category": ["pipeline"],
            "tags": ["pos", "lemmatizer", "french"]
        },
        {
            "id": "lemmy",
            "title": "Lemmy",
            "slogan": "A Danish lemmatizer",
            "description": "Lemmy is a lemmatizer for Danish 🇩🇰 . It comes already trained on Dansk Sprognævns (DSN) word list (‘fuldformliste’) and the Danish Universal Dependencies and is ready for use. Lemmy also supports training on your own dataset. The model currently included in Lemmy was evaluated on the Danish Universal Dependencies dev dataset and scored an accruacy > 99%.\n\nYou can use Lemmy as a spaCy extension, more specifcally a spaCy pipeline component. This is highly recommended and makes the lemmas easily accessible from the spaCy tokens. Lemmy makes use of POS tags to predict the lemmas. When wired up to the spaCy pipeline, Lemmy has the benefit of using spaCy’s builtin POS tagger.",
            "github": "sorenlind/lemmy",
            "pip": "lemmy",
            "code_example": [
                "import da_custom_model as da # name of your spaCy model",
                "import lemmy.pipe",
                "nlp = da.load()",
                "",
                "# create an instance of Lemmy's pipeline component for spaCy",
                "pipe = lemmy.pipe.load()",
                "",
                "# add the comonent to the spaCy pipeline.",
                "nlp.add_pipe(pipe, after='tagger')",
                "",
                "# lemmas can now be accessed using the `._.lemma` attribute on the tokens",
                "nlp(\"akvariernes\")[0]._.lemma"
            ],
            "thumb": "https://i.imgur.com/RJVFRWm.jpg",
            "author": "Søren Lind Kristiansen",
            "author_links": {
                "github": "sorenlind"
            },
            "category": ["pipeline"],
            "tags": ["lemmatizer", "danish"]
        },
        {
            "id": "wmd-relax",
            "slogan": "Calculates word mover's distance insanely fast",
            "description": "Calculates Word Mover's Distance as described in [From Word Embeddings To Document Distances](http://www.cs.cornell.edu/~kilian/papers/wmd_metric.pdf) by Matt Kusner, Yu Sun, Nicholas Kolkin and Kilian Weinberger.\n\n⚠️ **This package is currently only compatible with spaCy v.1x.**",
            "github": "src-d/wmd-relax",
            "thumb": "https://i.imgur.com/f91C3Lf.jpg",
            "code_example": [
                "import spacy",
                "import wmd",
                "",
                "nlp = spacy.load('en', create_pipeline=wmd.WMD.create_spacy_pipeline)",
                "doc1 = nlp(\"Politician speaks to the media in Illinois.\")",
                "doc2 = nlp(\"The president greets the press in Chicago.\")",
                "print(doc1.similarity(doc2))"
            ],
            "author": "source{d}",
            "author_links": {
                "github": "src-d",
                "twitter": "sourcedtech",
                "website": "https://sourced.tech"
            },
            "category": ["pipeline"]
        },
        {
            "id": "neuralcoref",
            "slogan": "State-of-the-art coreference resolution based on neural nets and spaCy",
            "description": "This coreference resolution module is based on the super fast [spaCy](https://spacy.io/) parser and uses the neural net scoring model described in [Deep Reinforcement Learning for Mention-Ranking Coreference Models](http://cs.stanford.edu/people/kevclark/resources/clark-manning-emnlp2016-deep.pdf) by Kevin Clark and Christopher D. Manning, EMNLP 2016. Since ✨Neuralcoref v2.0, you can train the coreference resolution system on your own dataset — e.g., another language than English! — **provided you have an annotated dataset**. Note that to use neuralcoref with spaCy > 2.1.0, you'll have to install neuralcoref from source.",
            "github": "huggingface/neuralcoref",
            "thumb": "https://i.imgur.com/j6FO9O6.jpg",
            "code_example": [
                "import spacy",
                "import neuralcoref",
                "",
                "nlp = spacy.load('en')",
                "neuralcoref.add_to_pipe(nlp)",
                "doc1 = nlp('My sister has a dog. She loves him.')",
                "print(doc1._.coref_clusters)",
                "",
                "doc2 = nlp('Angela lives in Boston. She is quite happy in that city.')",
                "for ent in doc2.ents:",
                "    print(ent._.coref_cluster)"
            ],
            "author": "Hugging Face",
            "author_links": {
                "github": "huggingface"
            },
            "category": ["standalone", "conversational", "models"],
            "tags": ["coref"]
        },
        {
            "id": "neuralcoref-vizualizer",
            "title": "Neuralcoref Visualizer",
            "slogan": "State-of-the-art coreference resolution based on neural nets and spaCy",
            "description": "In short, coreference is the fact that two or more expressions in a text – like pronouns or nouns – link to the same person or thing. It is a classical Natural language processing task, that has seen a revival of interest in the past two years as several research groups applied cutting-edge deep-learning and reinforcement-learning techniques to it. It is also one of the key building blocks to building conversational Artificial intelligences.",
            "url": "https://huggingface.co/coref/",
            "image": "https://i.imgur.com/3yy4Qyf.png",
            "thumb": "https://i.imgur.com/j6FO9O6.jpg",
            "github": "huggingface/neuralcoref",
            "category": ["visualizers", "conversational"],
            "tags": ["coref", "chatbots"],
            "author": "Hugging Face",
            "author_links": {
                "github": "huggingface"
            }
        },
        {
            "id": "spacy-vis",
            "slogan": "A visualisation tool for spaCy using Hierplane",
            "description": "A visualiser for spaCy annotations. This visualisation uses the [Hierplane](https://allenai.github.io/hierplane/) Library to render the dependency parse from spaCy's models. It also includes visualisation of entities and POS tags within nodes.",
            "github": "DeNeutoy/spacy-vis",
            "url": "http://spacyvis.allennlp.org/spacy-parser",
            "thumb": "https://i.imgur.com/DAG9QFd.jpg",
            "image": "https://raw.githubusercontent.com/DeNeutoy/spacy-vis/master/img/example.gif",
            "author": "Mark Neumann",
            "author_links": {
                "twitter": "MarkNeumannnn",
                "github": "DeNeutoy"
            },
            "category": ["visualizers"]
        },
        {
            "id": "matcher-explorer",
            "title": "Rule-based Matcher Explorer",
            "slogan": "Test spaCy's rule-based Matcher by creating token patterns interactively",
            "description": "Test spaCy's rule-based `Matcher` by creating token patterns interactively and running them over your text. Each token can set multiple attributes like text value, part-of-speech tag or boolean flags. The token-based view lets you explore how spaCy processes your text – and why your pattern matches, or why it doesn't. For more details on rule-based matching, see the [documentation](https://spacy.io/usage/rule-based-matching).",
            "image": "https://explosion.ai/assets/img/demos/matcher.png",
            "thumb": "https://i.imgur.com/rPK4AGt.jpg",
            "url": "https://explosion.ai/demos/matcher",
            "author": "Ines Montani",
            "author_links": {
                "twitter": "_inesmontani",
                "github": "ines",
                "website": "https://ines.io"
            },
            "category": ["visualizers"]
        },
        {
            "id": "displacy",
            "title": "displaCy",
            "slogan": "A modern syntactic dependency visualizer",
            "description": "Visualize spaCy's guess at the syntactic structure of a sentence. Arrows point from children to heads, and are labelled by their relation type.",
            "url": "https://explosion.ai/demos/displacy",
            "thumb": "https://i.imgur.com/nxDcHaL.jpg",
            "image": "https://explosion.ai/assets/img/demos/displacy.png",
            "author": "Ines Montani",
            "author_links": {
                "twitter": "_inesmontani",
                "github": "ines",
                "website": "https://ines.io"
            },
            "category": ["visualizers"]
        },
        {
            "id": "displacy-ent",
            "title": "displaCy ENT",
            "slogan": "A modern named entity visualizer",
            "description": "Visualize spaCy's guess at the named entities in the document. You can filter the displayed types, to only show the annotations you're interested in.",
            "url": "https://explosion.ai/demos/displacy-ent",
            "thumb": "https://i.imgur.com/A77Ecbs.jpg",
            "image": "https://explosion.ai/assets/img/demos/displacy-ent.png",
            "author": "Ines Montani",
            "author_links": {
                "twitter": "_inesmontani",
                "github": "ines",
                "website": "https://ines.io"
            },
            "category": ["visualizers"]
        },
        {
            "id": "explacy",
            "slogan": "A small tool that explains spaCy parse results",
            "github": "tylerneylon/explacy",
            "thumb": "https://i.imgur.com/V1hCWmn.jpg",
            "image": "https://raw.githubusercontent.com/tylerneylon/explacy/master/img/screenshot.png",
            "code_example": [
                "import spacy",
                "import explacy",
                "",
                "nlp = spacy.load('en')",
                "explacy.print_parse_info(nlp, 'The salad was surprisingly tasty.')"
            ],
            "author": "Tyler Neylon",
            "author_links": {
                "github": "tylerneylon"
            },
            "category": ["visualizers"]
        },
        {
            "id": "scattertext",
            "slogan": "Beautiful visualizations of how language differs among document types",
            "description": "A tool for finding distinguishing terms in small-to-medium-sized corpora, and presenting them in a sexy, interactive scatter plot with non-overlapping term labels. Exploratory data analysis just got more fun.",
            "github": "JasonKessler/scattertext",
            "image": "https://jasonkessler.github.io/2012conventions0.0.2.2.png",
            "code_example": [
                "import spacy",
                "import scattertext as st",
                "",
                "nlp = spacy.load('en')",
                "corpus = st.CorpusFromPandas(convention_df,",
                "                             category_col='party',",
                "                             text_col='text',",
                "                             nlp=nlp).build()"
            ],
            "author": "Jason Kessler",
            "author_links": {
                "github": "JasonKessler",
                "twitter": "jasonkessler"
            },
            "category": ["visualizers"]
        },
        {
            "id": "rasa",
            "title": "Rasa",
            "slogan": "Turn natural language into structured data",
            "description": "Machine learning tools for developers to build, improve, and deploy contextual chatbots and assistants. Powered by open source.",
            "github": "RasaHQ/rasa",
            "pip": "rasa",
            "thumb": "https://i.imgur.com/TyZnpwL.png",
            "url": "https://rasa.com/",
            "author": "Rasa",
            "author_links": {
                "github": "RasaHQ"
            },
            "category": ["conversational"],
            "tags": ["chatbots"]
        },
        {
            "id": "torchtext",
            "title": "torchtext",
            "slogan": "Data loaders and abstractions for text and NLP",
            "github": "pytorch/text",
            "pip": "torchtext",
            "thumb": "https://i.imgur.com/WFkxuPo.png",
            "code_example": [
                ">>> pos = data.TabularDataset(",
                "...    path='data/pos/pos_wsj_train.tsv', format='tsv',",
                "...    fields=[('text', data.Field()),",
                "...            ('labels', data.Field())])",
                "...",
                ">>> sentiment = data.TabularDataset(",
                "...    path='data/sentiment/train.json', format='json',",
                "...    fields={'sentence_tokenized': ('text', data.Field(sequential=True)),",
                "...            'sentiment_gold': ('labels', data.Field(sequential=False))})"
            ],
            "category": ["standalone", "research"],
            "tags": ["pytorch"]
        },
        {
            "id": "allennlp",
            "title": "AllenNLP",
            "slogan": "An open-source NLP research library, built on PyTorch and spaCy",
            "description": "AllenNLP is a new library designed to accelerate NLP research, by providing a framework that supports modern deep learning workflows for cutting-edge language understanding problems. AllenNLP uses spaCy as a preprocessing component. You can also use Allen NLP to develop spaCy pipeline components, to add annotations to the `Doc` object.",
            "github": "allenai/allennlp",
            "pip": "allennlp",
            "thumb": "https://i.imgur.com/U8opuDN.jpg",
            "url": "http://allennlp.org",
            "author": " Allen Institute for Artificial Intelligence",
            "author_links": {
                "github": "allenai",
                "twitter": "allenai_org",
                "website": "http://allenai.org"
            },
            "category": ["standalone", "research"]
        },
        {
            "id": "scispacy",
            "title": "scispaCy",
            "slogan": "A full spaCy pipeline and models for scientific/biomedical documents",
            "github": "allenai/scispacy",
            "pip": "scispacy",
            "thumb": "https://i.imgur.com/dJQSclW.png",
            "url": "https://allenai.github.io/scispacy/",
            "author": " Allen Institute for Artificial Intelligence",
            "author_links": {
                "github": "allenai",
                "twitter": "allenai_org",
                "website": "http://allenai.org"
            },
            "category": ["scientific", "models", "research"]
        },
        {
            "id": "textacy",
            "slogan": "NLP, before and after spaCy",
            "description": "`textacy` is a Python library for performing a variety of natural language processing (NLP) tasks, built on the high-performance `spacy` library. With the fundamentals – tokenization, part-of-speech tagging, dependency parsing, etc. – delegated to another library, `textacy` focuses on the tasks that come before and follow after.",
            "github": "chartbeat-labs/textacy",
            "pip": "textacy",
            "url": "https://chartbeat-labs.github.io/textacy/",
            "author": "Burton DeWilde",
            "author_links": {
                "github": "bdewilde",
                "twitter": "bjdewilde"
            },
            "category": ["standalone"]
        },
        {
            "id": "textpipe",
            "slogan": "clean and extract metadata from text",
            "description": "`textpipe` is a Python package for converting raw text in to clean, readable text and extracting metadata from that text. Its functionalities include transforming raw text into readable text by removing HTML tags and extracting metadata such as the number of words and named entities from the text.",
            "github": "textpipe/textpipe",
            "pip": "textpipe",
            "author": "Textpipe Contributors",
            "author_links": {
                "github": "textpipe",
                "website": "https://github.com/textpipe/textpipe/blob/master/CONTRIBUTORS.md"
            },
            "category": ["standalone"],
            "tags": ["text-processing", "named-entity-recognition"],
            "thumb": "https://avatars0.githubusercontent.com/u/40492530",
            "code_example": [
                "from textpipe import doc, pipeline",
                "sample_text = 'Sample text! <!DOCTYPE>'",
                "document = doc.Doc(sample_text)",
                "print(document.clean)",
                "'Sample text!'",
                "print(document.language)",
                "# 'en'",
                "print(document.nwords)",
                "# 2",
                "",
                "pipe = pipeline.Pipeline(['CleanText', 'NWords'])",
                "print(pipe(sample_text))",
                "# {'CleanText': 'Sample text!', 'NWords': 2}"
            ]
        },
        {
            "id": "mordecai",
            "slogan": "Full text geoparsing using spaCy, Geonames and Keras",
            "description": "Extract the place names from a piece of text, resolve them to the correct place, and return their coordinates and structured geographic information.",
            "github": "openeventdata/mordecai",
            "pip": "mordecai",
            "thumb": "https://i.imgur.com/gPJ9upa.jpg",
            "code_example": [
                "from mordecai import Geoparser",
                "geo = Geoparser()",
                "geo.geoparse(\"I traveled from Oxford to Ottawa.\")"
            ],
            "author": "Andy Halterman",
            "author_links": {
                "github": "ahalterman",
                "twitter": "ahalterman"
            },
            "category": ["standalone", "scientific"]
        },
        {
            "id": "kindred",
            "title": "Kindred",
            "slogan": "Biomedical relation extraction using spaCy",
            "description": "Kindred is a package for relation extraction in biomedical texts. Given some training data, it can build a model to identify relations between entities (e.g. drugs, genes, etc) in a sentence.",
            "github": "jakelever/kindred",
            "pip": "kindred",
            "code_example": [
                "import kindred",
                "",
                "trainCorpus = kindred.bionlpst.load('2016-BB3-event-train')",
                "devCorpus = kindred.bionlpst.load('2016-BB3-event-dev')",
                "predictionCorpus = devCorpus.clone()",
                "predictionCorpus.removeRelations()",
                "classifier = kindred.RelationClassifier()",
                "classifier.train(trainCorpus)",
                "classifier.predict(predictionCorpus)",
                "f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')"
            ],
            "author": "Jake Lever",
            "author_links": {
                "github": "jakelever"
            },
            "category": ["standalone", "scientific"]
        },
        {
            "id": "sense2vec",
            "slogan": "Use NLP to go beyond vanilla word2vec",
            "description": "sense2vec ([Trask et. al](https://arxiv.org/abs/1511.06388), 2015) is a nice twist on [word2vec](https://en.wikipedia.org/wiki/Word2vec) that lets you learn more interesting, detailed and context-sensitive word vectors. For an interactive example of the technology, see our [sense2vec demo](https://explosion.ai/demos/sense2vec) that lets you explore semantic similarities across all Reddit comments of 2015.",
            "github": "explosion/sense2vec",
            "pip": "sense2vec==1.0.0a1",
            "thumb": "https://i.imgur.com/awfdhX6.jpg",
            "image": "https://explosion.ai/assets/img/demos/sense2vec.png",
            "url": "https://explosion.ai/demos/sense2vec",
            "code_example": [
                "import spacy",
                "from sense2vec import Sense2VecComponent",
                "",
                "nlp = spacy.load('en')",
                "s2v = Sense2VecComponent('/path/to/reddit_vectors-1.1.0')",
                "nlp.add_pipe(s2v)",
                "",
                "doc = nlp(\"A sentence about natural language processing.\")",
                "assert doc[3].text == 'natural language processing'",
                "freq = doc[3]._.s2v_freq",
                "vector = doc[3]._.s2v_vec",
                "most_similar = doc[3]._.s2v_most_similar(3)",
                "# [(('natural language processing', 'NOUN'), 1.0),",
                "#  (('machine learning', 'NOUN'), 0.8986966609954834),",
                "#  (('computer vision', 'NOUN'), 0.8636297583580017)]"
            ],
            "category": ["pipeline", "standalone", "visualizers"],
            "tags": ["vectors"],
            "author": "Explosion",
            "author_links": {
                "twitter": "explosion_ai",
                "github": "explosion",
                "website": "https://explosion.ai"
            }
        },
        {
            "id": "spacyr",
            "slogan": "An R wrapper for spaCy",
            "github": "quanteda/spacyr",
            "cran": "spacyr",
            "code_example": [
                "library(\"spacyr\")",
                "spacy_initialize()",
                "",
                "txt <- c(d1 = \"spaCy excels at large-scale information extraction tasks.\",",
                "         d2 = \"Mr. Smith goes to North Carolina.\")",
                "",
                "# process documents and obtain a data.table",
                "parsedtxt <- spacy_parse(txt)"
            ],
            "code_language": "r",
            "author": "Kenneth Benoit & Aki Matsuo",
            "category": ["nonpython"]
        },
        {
            "id": "cleannlp",
            "title": "CleanNLP",
            "slogan": "A tidy data model for NLP in R",
            "description": "The cleanNLP package is designed to make it as painless as possible to turn raw text into feature-rich data frames. the package offers four backends that can be used for parsing text: `tokenizers`, `udpipe`, `spacy` and `corenlp`.",
            "github": "statsmaths/cleanNLP",
            "cran": "cleanNLP",
            "author": "Taylor B. Arnold",
            "author_links": {
                "github": "statsmaths"
            },
            "category": ["nonpython"]
        },
        {
            "id": "spacy-cpp",
            "slogan": "C++ wrapper library for spaCy",
            "description": "The goal of spacy-cpp is to expose the functionality of spaCy to C++ applications, and to provide an API that is similar to that of spaCy, enabling rapid development in Python and simple porting to C++.",
            "github": "d99kris/spacy-cpp",
            "code_example": [
                "Spacy::Spacy spacy;",
                "auto nlp = spacy.load(\"en_core_web_sm\");",
                "auto doc = nlp.parse(\"This is a sentence.\");",
                "for (auto& token : doc.tokens())",
                "    std::cout << token.text() << \" [\" << token.pos_() << \"]\\n\";"
            ],
            "code_language": "cpp",
            "author": "Kristofer Berggren",
            "author_links": {
                "github": "d99kris"
            },
            "category": ["nonpython"]
        },
        {
            "id": "spaCy.jl",
            "slogan": "Julia interface for spaCy (work in progress)",
            "github": "jekbradbury/SpaCy.jl",
            "author": "James Bradbury",
            "author_links": {
                "github": "jekbradbury",
                "twitter": "jekbradbury"
            },
            "category": ["nonpython"]
        },
        {
            "id": "spacy_api",
            "slogan": "Server/client to load models in a separate, dedicated process",
            "github": "kootenpv/spacy_api",
            "pip": "spacy_api",
            "code_example": [
                "from spacy_api import Client",
                "",
                "spacy_client = Client() # default args host/port",
                "doc = spacy_client.single(\"How are you\")"
            ],
            "author": "Pascal van Kooten",
            "author_links": {
                "github": "kootenpv"
            },
            "category": ["apis"]
        },
        {
            "id": "spacy-api-docker",
            "slogan": "spaCy REST API, wrapped in a Docker container",
            "github": "jgontrum/spacy-api-docker",
            "url": "https://hub.docker.com/r/jgontrum/spacyapi/",
            "thumb": "https://i.imgur.com/NRnDKyj.jpg",
            "code_example": [
                "version: '2'",
                "",
                "services:",
                "  spacyapi:",
                "    image: jgontrum/spacyapi:en_v2",
                "    ports:",
                "      - \"127.0.0.1:8080:80\"",
                "    restart: always"
            ],
            "code_language": "docker",
            "author": "Johannes Gontrum",
            "author_links": {
                "github": "jgontrum"
            },
            "category": ["apis"]
        },
        {
            "id": "languagecrunch",
            "slogan": "NLP server for spaCy, WordNet and NeuralCoref as a Docker image",
            "github": "artpar/languagecrunch",
            "code_example": [
                "docker run -it -p 8080:8080 artpar/languagecrunch",
                "curl http://localhost:8080/nlp/parse?`echo -n \"The new twitter is so weird. Seriously. Why is there a new twitter? What was wrong with the old one? Fix it now.\" | python -c \"import urllib, sys; print(urllib.urlencode({'sentence': sys.stdin.read()}))\"`"
            ],
            "code_language": "bash",
            "author": "Parth Mudgal",
            "author_links": {
                "github": "artpar"
            },
            "category": ["apis"]
        },
        {
            "id": "spacy-nlp",
            "slogan": " Expose spaCy NLP text parsing to Node.js (and other languages) via Socket.IO",
            "github": "kengz/spacy-nlp",
            "thumb": "https://i.imgur.com/w41VSr7.jpg",
            "code_example": [
                "const spacyNLP = require(\"spacy-nlp\")",
                "// default port 6466",
                "// start the server with the python client that exposes spacyIO (or use an existing socketIO server at IOPORT)",
                "var serverPromise = spacyNLP.server({ port: process.env.IOPORT });",
                "// Loading spacy may take up to 15s"
            ],
            "code_language": "javascript",
            "author": "Wah Loon Keng",
            "author_links": {
                "github": "kengz"
            },
            "category": ["apis", "nonpython"]
        },
        {
            "id": "prodigy",
            "title": "Prodigy",
            "slogan": "Radically efficient machine teaching, powered by active learning",
            "description": "Prodigy is an annotation tool so efficient that data scientists can do the annotation themselves, enabling a new level of rapid iteration. Whether you're working on entity recognition, intent detection or image classification, Prodigy can help you train and evaluate your models faster. Stream in your own examples or real-world data from live APIs, update your model in real-time and chain models together to build more complex systems.",
            "thumb": "https://i.imgur.com/UVRtP6g.jpg",
            "image": "https://i.imgur.com/Dt5vrY6.png",
            "url": "https://prodi.gy",
            "code_example": [
                "prodigy dataset ner_product \"Improve PRODUCT on Reddit data\"",
                "✨ Created dataset 'ner_product'.",
                "",
                "prodigy ner.teach ner_product en_core_web_sm ~/data.jsonl --label PRODUCT",
                "✨ Starting the web server on port 8080..."
            ],
            "code_language": "bash",
            "category": ["standalone", "training"],
            "author": "Explosion",
            "author_links": {
                "twitter": "explosion_ai",
                "github": "explosion",
                "website": "https://explosion.ai"
            }
        },
        {
            "id": "dragonfire",
            "title": "Dragonfire",
            "slogan": "An open-source virtual assistant for Ubuntu based Linux distributions",
            "github": "DragonComputer/Dragonfire",
            "thumb": "https://i.imgur.com/5fqguKS.jpg",
            "image": "https://raw.githubusercontent.com/DragonComputer/Dragonfire/master/docs/img/demo.gif",
            "author": "Dragon Computer",
            "author_links": {
                "github": "DragonComputer",
                "website": "http://dragon.computer"
            },
            "category": ["standalone"]
        },
        {
            "id": "prefect",
            "title": "Prefect",
            "slogan": "Workflow management system designed for modern infrastructure",
            "github": "PrefectHQ/prefect",
            "pip": "prefect",
            "thumb": "https://i.imgur.com/oLTwr0e.png",
            "code_example": [
                "from prefect import Flow",
                "from prefect.tasks.spacy.spacy_tasks import SpacyNLP",
                "import spacy",
                "",
                "nlp = spacy.load(\"en_core_web_sm\")",
                "",
                "with Flow(\"Natural Language Processing\") as flow:",
                "    doc = SpacyNLP(text=\"This is some text\", nlp=nlp)",
                "",
                "flow.run()"
            ],
            "author": "Prefect",
            "author_links": {
                "website": "https://prefect.io"
            },
            "category": ["standalone"]
        },
        {
            "id": "graphbrain",
            "title": "Graphbrain",
            "slogan": "Automated meaning extraction and text understanding",
            "description": "Graphbrain is an Artificial Intelligence open-source software library and scientific research tool. Its aim is to facilitate automated meaning extraction and text understanding, as well as the exploration and inference of knowledge.",
            "github": "graphbrain/graphbrain",
            "pip": "graphbrain",
            "thumb": "https://i.imgur.com/cct9W1E.png",
            "author": "Graphbrain",
            "category": ["standalone"]
        },
        {
            "type": "education",
            "id": "nostarch-nlp-python",
            "title": "Natural Language Processing Using Python",
            "slogan": "No Starch Press, 2020",
            "description": "Natural Language Processing Using Python is an introduction to natural language processing (NLP), the task of converting human language into data that a computer can process. The book uses spaCy, a leading Python library for NLP, to guide readers through common NLP tasks related to generating and understanding human language with code. It addresses problems like understanding a user's intent, continuing a conversation with a human, and maintaining the state of a conversation.",
            "cover": "https://i.imgur.com/w0iycjl.jpg",
            "url": "https://nostarch.com/NLPPython",
            "author": "Yuli Vasiliev",
            "category": ["books"]
        },
        {
            "type": "education",
            "id": "oreilly-python-ds",
            "title": "Introduction to Machine Learning with Python: A Guide for Data Scientists",
            "slogan": "O'Reilly, 2016",
            "description": "Machine learning has become an integral part of many commercial applications and research projects, but this field is not exclusive to large companies with extensive research teams. If you use Python, even as a beginner, this book will teach you practical ways to build your own machine learning solutions. With all the data available today, machine learning applications are limited only by your imagination.",
            "cover": "https://covers.oreillystatic.com/images/0636920030515/lrg.jpg",
            "url": "http://shop.oreilly.com/product/0636920030515.do",
            "author": "Andreas Müller, Sarah Guido",
            "category": ["books"]
        },
        {
            "type": "education",
            "id": "text-analytics-python",
            "title": "Text Analytics with Python",
            "slogan": "Apress / Springer, 2016",
            "description": "*Text Analytics with Python* teaches you the techniques related to natural language processing and text analytics, and you will gain the skills to know which technique is best suited to solve a particular problem. You will look at each technique and algorithm with both a bird's eye view to understand how it can be used as well as with a microscopic view to understand the mathematical concepts and to implement them to solve your own problems.",
            "github": "dipanjanS/text-analytics-with-python",
            "cover": "https://i.imgur.com/AOmzZu8.png",
            "url": "https://www.amazon.com/Text-Analytics-Python-Real-World-Actionable/dp/148422387X",
            "author": "Dipanjan Sarkar",
            "category": ["books"]
        },
        {
            "type": "education",
            "id": "practical-ml-python",
            "title": "Practical Machine Learning with Python",
            "slogan": "Apress, 2017",
            "description": "Master the essential skills needed to recognize and solve complex problems with machine learning and deep learning. Using real-world examples that leverage the popular Python machine learning ecosystem, this book is your perfect companion for learning the art and science of machine learning to become a successful practitioner. The concepts, techniques, tools, frameworks, and methodologies used in this book will teach you how to think, design, build, and execute machine learning systems and projects successfully.",
            "github": "dipanjanS/practical-machine-learning-with-python",
            "cover": "https://i.imgur.com/5F4mkt7.jpg",
            "url": "https://www.amazon.com/Practical-Machine-Learning-Python-Problem-Solvers/dp/1484232062",
            "author": "Dipanjan Sarkar, Raghav Bali, Tushar Sharma",
            "category": ["books"]
        },
        {
            "type": "education",
            "id": "packt-nlp-computational-linguistics",
            "title": "Natural Language Processing and Computational Linguistics",
            "slogan": "Packt, 2018",
            "description": "This book shows you how to use natural language processing, and computational linguistics algorithms, to make inferences and gain insights about data you have. These algorithms are based on statistical machine learning and artificial intelligence techniques. The tools to work with these algorithms are available to you right now - with Python, and tools like Gensim and spaCy.",
            "cover": "https://i.imgur.com/aleMf1Y.jpg",
            "url": "https://www.amazon.com/Natural-Language-Processing-Computational-Linguistics-ebook/dp/B07BWH779J",
            "author": "Bhargav Srinivasa-Desikan",
            "category": ["books"]
        },
        {
            "type": "education",
            "id": "learning-path-spacy",
            "title": "Learning Path: Mastering spaCy for Natural Language Processing",
            "slogan": "O'Reilly, 2017",
            "description": "spaCy, a fast, user-friendly library for teaching computers to understand text, simplifies NLP techniques, such as speech tagging and syntactic dependencies, so you can easily extract information, attributes, and objects from massive amounts of text to then document, measure, and analyze. This Learning Path is a hands-on introduction to using spaCy to discover insights through natural language processing. While end-to-end natural language processing solutions can be complex, you’ll learn the linguistics, algorithms, and machine learning skills to get the job done.",
            "url": "https://www.safaribooksonline.com/library/view/learning-path-mastering/9781491986653/",
            "thumb": "https://i.imgur.com/9MIgMAc.jpg",
            "author": "Aaron Kramer",
            "category": ["courses"]
        },
        {
            "type": "education",
            "id": "spacy-course",
            "title": "Advanced NLP with spaCy",
            "slogan": "A free online course",
            "description": "In this free interactive course, you'll learn how to use spaCy to build advanced natural language understanding systems, using both rule-based and machine learning approaches.",
            "url": "https://course.spacy.io",
            "image": "https://i.imgur.com/JC00pHW.jpg",
            "thumb": "https://i.imgur.com/5RXLtrr.jpg",
            "author": "Ines Montani",
            "author_links": {
                "twitter": "_inesmontani",
                "github": "ines",
                "website": "https://ines.io"
            },
            "category": ["courses"]
        },
        {
            "type": "education",
            "id": "video-spacys-ner-model",
            "title": "spaCy's NER model",
            "slogan": "Incremental parsing with bloom embeddings and residual CNNs",
            "description": "spaCy v2.0's Named Entity Recognition system features a sophisticated word embedding strategy using subword features and \"Bloom\" embeddings, a deep convolutional neural network with residual connections, and a novel transition-based approach to named entity parsing. The system is designed to give a good balance of efficiency, accuracy and adaptability. In this talk, I sketch out the components of the system, explaining the intuition behind the various choices. I also give a brief introduction to the named entity recognition problem, with an overview of what else Explosion AI is working on, and why.",
            "youtube": "sqDHBH9IjRU",
            "author": "Matthew Honnibal",
            "author_links": {
                "twitter": "honnibal",
                "github": "honnibal",
                "website": "https://explosion.ai"
            },
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-new-nlp-solutions",
            "title": "Building new NLP solutions with spaCy and Prodigy",
            "slogan": "PyData Berlin 2018",
            "description": "In this talk, I will discuss how to address some of the most likely causes of failure for new Natural Language Processing (NLP) projects. My main recommendation is to take an iterative approach: don't assume you know what your pipeline should look like, let alone your annotation schemes or model architectures.",
            "author": "Matthew Honnibal",
            "author_links": {
                "twitter": "honnibal",
                "github": "honnibal",
                "website": "https://explosion.ai"
            },
            "youtube": "jpWqz85F_4Y",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-modern-nlp-in-python",
            "title": "Modern NLP in Python",
            "slogan": "PyData DC 2016",
            "description": "Academic and industry research in Natural Language Processing (NLP) has progressed at an accelerating pace over the last several years. Members of the Python community have been hard at work moving cutting-edge research out of papers and into open source, \"batteries included\" software libraries that can be applied to practical problems. We'll explore some of these tools for modern NLP in Python.",
            "author": "Patrick Harrison",
            "youtube": "6zm9NC9uRkk",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-spacy-course",
            "title": "Advanced NLP with spaCy · A free online course",
            "description": "spaCy is a modern Python library for industrial-strength Natural Language Processing. In this free and interactive online course, you'll learn how to use spaCy to build advanced natural language understanding systems, using both rule-based and machine learning approaches.",
            "url": "https://course.spacy.io/en",
            "author": "Ines Montani",
            "author_links": {
                "twitter": "_inesmontani",
                "github": "ines"
            },
            "youtube": "THduWAnG97k",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-spacy-course-de",
            "title": "Modernes NLP mit spaCy · Ein Gratis-Onlinekurs",
            "description": "spaCy ist eine moderne Python-Bibliothek für industriestarkes Natural Language Processing. In diesem kostenlosen und interaktiven Onlinekurs lernst du, mithilfe von spaCy fortgeschrittene Systeme für die Analyse natürlicher Sprache zu entwickeln und dabei sowohl regelbasierte Verfahren, als auch moderne Machine-Learning-Technologie einzusetzen.",
            "url": "https://course.spacy.io/de",
            "author": "Ines Montani",
            "author_links": {
                "twitter": "_inesmontani",
                "github": "ines"
            },
            "youtube": "K1elwpgDdls",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-spacy-course-es",
            "title": "NLP avanzado con spaCy · Un curso en línea gratis",
            "description": "spaCy es un paquete moderno de Python para hacer Procesamiento de Lenguaje Natural de potencia industrial. En este curso en línea, interactivo y gratuito, aprenderás a usar spaCy para construir sistemas avanzados de comprensión de lenguaje natural usando enfoques basados en reglas y en machine learning.",
            "url": "https://course.spacy.io/es",
            "author": "Camila Gutiérrez",
            "author_links": {
                "twitter": "Mariacamilagl30"
            },
            "youtube": "RNiLVCE5d4k",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-intro-to-nlp-episode-1",
            "title": "Intro to NLP with spaCy (1)",
            "slogan": "Episode 1: Data exploration",
            "description": "In this new video series, data science instructor Vincent Warmerdam gets started with spaCy, an open-source library for Natural Language Processing in Python. His mission: building a system to automatically detect programming languages in large volumes of text. Follow his process from the first idea to a prototype all the way to data collection and training a statistical named entity recogntion model from scratch.",
            "author": "Vincent Warmerdam",
            "author_links": {
                "twitter": "fishnets88",
                "github": "koaning"
            },
            "youtube": "WnGPv6HnBok",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-intro-to-nlp-episode-2",
            "title": "Intro to NLP with spaCy (2)",
            "slogan": "Episode 2: Rule-based Matching",
            "description": "In this new video series, data science instructor Vincent Warmerdam gets started with spaCy, an open-source library for Natural Language Processing in Python. His mission: building a system to automatically detect programming languages in large volumes of text. Follow his process from the first idea to a prototype all the way to data collection and training a statistical named entity recogntion model from scratch.",
            "author": "Vincent Warmerdam",
            "author_links": {
                "twitter": "fishnets88",
                "github": "koaning"
            },
            "youtube": "KL4-Mpgbahw",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-intro-to-nlp-episode-3",
            "title": "Intro to NLP with spaCy (3)",
            "slogan": "Episode 2: Evaluation",
            "description": "In this new video series, data science instructor Vincent Warmerdam gets started with spaCy, an open-source library for Natural Language Processing in Python. His mission: building a system to automatically detect programming languages in large volumes of text. Follow his process from the first idea to a prototype all the way to data collection and training a statistical named entity recogntion model from scratch.",
            "author": "Vincent Warmerdam",
            "author_links": {
                "twitter": "fishnets88",
                "github": "koaning"
            },
            "youtube": "4V0JDdohxAk",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-intro-to-nlp-episode-4",
            "title": "Intro to NLP with spaCy (4)",
            "slogan": "Episode 4: Named Entity Recognition",
            "description": "In this new video series, data science instructor Vincent Warmerdam gets started with spaCy, an open-source library for Natural Language Processing in Python. His mission: building a system to automatically detect programming languages in large volumes of text. Follow his process from the first idea to a prototype all the way to data collection and training a statistical named entity recogntion model from scratch.",
            "author": "Vincent Warmerdam",
            "author_links": {
                "twitter": "fishnets88",
                "github": "koaning"
            },
            "youtube": "IqOJU1-_Fi0",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-intro-to-nlp-episode-5",
            "title": "Intro to NLP with spaCy (5)",
            "slogan": "Episode 5: Rules vs. Machine Learning",
            "description": "In this new video series, data science instructor Vincent Warmerdam gets started with spaCy, an open-source library for Natural Language Processing in Python. His mission: building a system to automatically detect programming languages in large volumes of text. Follow his process from the first idea to a prototype all the way to data collection and training a statistical named entity recogntion model from scratch.",
            "author": "Vincent Warmerdam",
            "author_links": {
                "twitter": "fishnets88",
                "github": "koaning"
            },
            "youtube": "f4sqeLRzkPg",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-spacy-irl-entity-linking",
            "title": "Entity Linking functionality in spaCy",
            "slogan": "spaCy IRL 2019",
            "url": "https://www.youtube.com/playlist?list=PLBmcuObd5An4UC6jvK_-eSl6jCvP1gwXc",
            "author": "Sofie Van Landeghem",
            "author_links": {
                "twitter": "OxyKodit",
                "github": "svlandeg"
            },
            "youtube": "PW3RJM8tDGo",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-spacy-irl-lemmatization",
            "title": "Rethinking rule-based lemmatization",
            "slogan": "spaCy IRL 2019",
            "url": "https://www.youtube.com/playlist?list=PLBmcuObd5An4UC6jvK_-eSl6jCvP1gwXc",
            "author": "Guadalupe Romero",
            "author_links": {
                "twitter": "_guadiromero",
                "github": "guadi1994"
            },
            "youtube": "88zcQODyuko",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "video-spacy-irl-scispacy",
            "title": "ScispaCy: A spaCy pipeline & models for scientific & biomedical text",
            "slogan": "spaCy IRL 2019",
            "url": "https://www.youtube.com/playlist?list=PLBmcuObd5An4UC6jvK_-eSl6jCvP1gwXc",
            "author": "Mark Neumann",
            "author_links": {
                "twitter": "MarkNeumannnn",
                "github": "DeNeutoy"
            },
            "youtube": "2_HSKDALwuw",
            "category": ["videos"]
        },
        {
            "type": "education",
            "id": "podcast-nlp-highlights",
            "title": "NLP Highlights #78: Where do corpora come from?",
            "slogan": "January 2019",
            "description": "Most NLP projects rely crucially on the quality of annotations used for training and evaluating models. In this episode, Matt and Ines of Explosion AI tell us how Prodigy can improve data annotation and model development workflows. Prodigy is an annotation tool implemented as a python library, and it comes with a web application and a command line interface. A developer can define input data streams and design simple annotation interfaces. Prodigy can help break down complex annotation decisions into a series of binary decisions, and it provides easy integration with spaCy models. Developers can specify how models should be modified as new annotations come in in an active learning framework.",
            "soundcloud": "559200912",
            "thumb": "https://i.imgur.com/hOBQEzc.jpg",
            "url": "https://soundcloud.com/nlp-highlights/78-where-do-corpora-come-from-with-matt-honnibal-and-ines-montani",
            "author": "Matt Gardner, Waleed Ammar (Allen AI)",
            "author_links": {
                "website": "https://soundcloud.com/nlp-highlights"
            },
            "category": ["podcasts"]
        },
        {
            "type": "education",
            "id": "podcast-init",
            "title": "Podcast.__init__ #87: spaCy with Matthew Honnibal",
            "slogan": "December 2017",
            "description": "As the amount of text available on the internet and in businesses continues to increase, the need for fast and accurate language analysis becomes more prominent. This week Matthew Honnibal, the creator of SpaCy, talks about his experiences researching natural language processing and creating a library to make his findings accessible to industry.",
            "iframe": "https://www.pythonpodcast.com/wp-content/plugins/podlove-podcasting-plugin-for-wordpress/lib/modules/podlove_web_player/player_v4/dist/share.html?episode=https://www.pythonpodcast.com/?podlove_player4=176",
            "iframe_height": 200,
            "thumb": "https://i.imgur.com/rpo6BuY.png",
            "url": "https://www.podcastinit.com/episode-87-spacy-with-matthew-honnibal/",
            "author": "Tobias Macey",
            "author_links": {
                "website": "https://www.podcastinit.com"
            },
            "category": ["podcasts"]
        },
        {
            "type": "education",
            "id": "podcast-init2",
            "title": "Podcast.__init__ #256: An Open Source Toolchain For NLP From Explosion AI",
            "slogan": "March 2020",
            "description": "The state of the art in natural language processing is a constantly moving target. With the rise of deep learning, previously cutting edge techniques have given way to robust language models. Through it all the team at Explosion AI have built a strong presence with the trifecta of SpaCy, Thinc, and Prodigy to support fast and flexible data labeling to feed deep learning models and performant and scalable text processing. In this episode founder and open source author Matthew Honnibal shares his experience growing a business around cutting edge open source libraries for the machine learning developent process.",
            "iframe": "https://cdn.podlove.org/web-player/share.html?episode=https%3A%2F%2Fwww.pythonpodcast.com%2F%3Fpodlove_player4%3D614",
            "iframe_height": 200,
            "thumb": "https://i.imgur.com/rpo6BuY.png",
            "url": "https://www.pythonpodcast.com/explosion-ai-natural-language-processing-episode-256/",
            "author": "Tobias Macey",
            "author_links": {
                "website": "https://www.podcastinit.com"
            },
            "category": ["podcasts"]
        },
        {
            "type": "education",
            "id": "talk-python-podcast",
            "title": "Talk Python #202: Building a software business",
            "slogan": "March 2019",
            "description": "One core question around open source is how do you fund it? Well, there is always that PayPal donate button. But that's been a tremendous failure for many projects. Often the go-to answer is consulting. But what if you don't want to trade time for money? You could take things up a notch and change the equation, exchanging value for money. That's what Ines Montani and her co-founder did when they started Explosion AI with spaCy as the foundation.",
            "thumb": "https://i.imgur.com/q1twuK8.png",
            "url": "https://talkpython.fm/episodes/show/202/building-a-software-business",
            "soundcloud": "588364857",
            "author": "Michael Kennedy",
            "author_links": {
                "website": "https://talkpython.fm/"
            },
            "category": ["podcasts"]
        },
        {
            "type": "education",
            "id": "twimlai-podcast",
            "title": "TWiML & AI: Practical NLP with spaCy and Prodigy",
            "slogan": "May 2019",
            "description": "\"Ines and I caught up to discuss her various projects, including the aforementioned SpaCy, an open-source NLP library built with a focus on industry and production use cases. In our conversation, Ines gives us an overview of the SpaCy Library, a look at some of the use cases that excite her, and the Spacy community and contributors. We also discuss her work with Prodigy, an annotation service tool that uses continuous active learning to train models, and finally, what other exciting projects she is working on.\"",
            "thumb": "https://i.imgur.com/ng2F5gK.png",
            "url": "https://twimlai.com/twiml-talk-262-practical-natural-language-processing-with-spacy-and-prodigy-w-ines-montani",
            "iframe": "https://html5-player.libsyn.com/embed/episode/id/9691514/height/90/theme/custom/thumbnail/no/preload/no/direction/backward/render-playlist/no/custom-color/3e85b1/",
            "iframe_height": 90,
            "author": "Sam Charrington",
            "author_links": {
                "website": "https://twimlai.com"
            },
            "category": ["podcasts"]
        },
        {
            "type": "education",
            "id": "analytics-vidhya",
            "title": "DataHack Radio #23: The Brains behind spaCy",
            "slogan": "June 2019",
            "description": "\"What would you do if you had the chance to pick the brains behind one of the most popular Natural Language Processing (NLP) libraries of our era? A library that has helped usher in the current boom in NLP applications and nurtured tons of NLP scientists? Well – you invite the creators on our popular DataHack Radio podcast and let them do the talking! We are delighted to welcome Ines Montani and Matt Honnibal, the developers of spaCy – a powerful and advanced library for NLP.\"",
            "thumb": "https://i.imgur.com/3zJKZ1P.jpg",
            "url": "https://www.analyticsvidhya.com/blog/2019/06/datahack-radio-ines-montani-matthew-honnibal-brains-behind-spacy/",
            "soundcloud": "630741825",
            "author": "Analytics Vidhya",
            "author_links": {
                "website": "https://www.analyticsvidhya.com",
                "twitter": "analyticsvidhya"
            },
            "category": ["podcasts"]
        },
        {
            "type": "education",
            "id": "practical-ai-podcast",
            "title": "Practical AI: Modern NLP with spaCy",
            "slogan": "December 2019",
            "description": "\"SpaCy is awesome for NLP! It’s easy to use, has widespread adoption, is open source, and integrates the latest language models. Ines Montani and Matthew Honnibal (core developers of spaCy and co-founders of Explosion) join us to discuss the history of the project, its capabilities, and the latest trends in NLP. We also dig into the practicalities of taking NLP workflows to production. You don’t want to miss this episode!\"",
            "thumb": "https://i.imgur.com/jn8Bcdw.png",
            "url": "https://changelog.com/practicalai/68",
            "author": "Daniel Whitenack & Chris Benson",
            "author_links": {
                "website": "https://changelog.com/practicalai",
                "twitter": "https://twitter.com/PracticalAIFM"
            },
            "category": ["podcasts"]
        },
        {
            "type": "education",
            "id": "video-entity-linking",
            "title": "Training a custom entity linking mode with spaCy",
            "author": "Sofie Van Landeghem",
            "author_links": {
                "twitter": "OxyKodit",
                "github": "svlandeg"
            },
            "youtube": "8u57WSXVpmw",
            "category": ["videos"]
        },
        {
            "id": "adam_qas",
            "title": "ADAM: Question Answering System",
            "slogan": "A question answering system that extracts answers from Wikipedia to questions posed in natural language.",
            "github": "5hirish/adam_qas",
            "pip": "qas",
            "code_example": [
                "git clone https://github.com/5hirish/adam_qas.git",
                "cd adam_qas",
                "pip install -r requirements.txt",
                "python -m qas.adam 'When was linux kernel version 4.0 released ?'"
            ],
            "code_language": "bash",
            "thumb": "https://shirishkadam.files.wordpress.com/2018/04/mini_alleviate.png",
            "author": "Shirish Kadam",
            "author_links": {
                "twitter": "5hirish",
                "github": "5hirish",
                "website": "https://shirishkadam.com/"
            },
            "category": ["standalone"],
            "tags": ["question-answering", "elasticsearch"]
        },
        {
            "id": "epitator",
            "title": "EpiTator",
            "thumb": "https://i.imgur.com/NYFY1Km.jpg",
            "slogan": "Extracts case counts, resolved location/species/disease names, date ranges and more",
            "description": "EcoHealth Alliance uses EpiTator to catalog the what, where and when of infectious disease case counts reported in online news. Each of these aspects is extracted using independent annotators than can be applied to other domains. EpiTator organizes annotations by creating \"AnnoTiers\" for each type. AnnoTiers have methods for manipulating, combining and searching annotations. For instance, the `with_following_spans_from()` method can be used to create a new tier that combines a tier of one type (such as numbers), with another (say, kitchenware). The resulting tier will contain all the phrases in the document that match that pattern, like \"5 plates\" or \"2 cups.\"\n\nAnother commonly used method is `group_spans_by_containing_span()` which can be used to do things like find all the spaCy tokens in all the GeoNames a document mentions. spaCy tokens, named entities, sentences and noun chunks are exposed through the spaCy annotator which will create a AnnoTier for each. These are basis of many of the other annotators. EpiTator also includes an annotator for extracting tables embedded in free text articles. Another neat feature is that the lexicons used for entity resolution are all stored in an embedded sqlite database so there is no need to run any external services in order to use EpiTator.",
            "url": "https://github.com/ecohealthalliance/EpiTator",
            "github": "ecohealthalliance/EpiTator",
            "pip": "EpiTator",
            "code_example": [
                "from epitator.annotator import AnnoDoc",
                "from epitator.geoname_annotator import GeonameAnnotator",
                "",
                "doc = AnnoDoc('Where is Chiang Mai?')",
                "geoname_annotier = doc.require_tiers('geonames', via=GeonameAnnotator)",
                "geoname = geoname_annotier.spans[0].metadata['geoname']",
                "geoname['name']",
                "# = 'Chiang Mai'",
                "geoname['geonameid']",
                "# = '1153671'",
                "geoname['latitude']",
                "# = 18.79038",
                "geoname['longitude']",
                "# = 98.98468",
                "",
                "from epitator.spacy_annotator import SpacyAnnotator",
                "spacy_token_tier = doc.require_tiers('spacy.tokens', via=SpacyAnnotator)",
                "list(geoname_annotier.group_spans_by_containing_span(spacy_token_tier))",
                "# = [(AnnoSpan(9-19, Chiang Mai), [AnnoSpan(9-15, Chiang), AnnoSpan(16-19, Mai)])]"
            ],
            "author": "EcoHealth Alliance",
            "author_links": {
                "github": "ecohealthalliance",
                "website": " https://ecohealthalliance.org/"
            },
            "category": ["scientific", "standalone"]
        },
        {
            "id": "self-attentive-parser",
            "title": "Berkeley Neural Parser",
            "slogan": "Constituency Parsing with a Self-Attentive Encoder (ACL 2018)",
            "description": "A Python implementation of the parsers described in *\"Constituency Parsing with a Self-Attentive Encoder\"* from ACL 2018.",
            "url": "https://arxiv.org/abs/1805.01052",
            "github": "nikitakit/self-attentive-parser",
            "pip": "benepar",
            "code_example": [
                "import spacy",
                "from benepar.spacy_plugin import BeneparComponent",
                "",
                "nlp = spacy.load('en')",
                "nlp.add_pipe(BeneparComponent('benepar_en'))",
                "doc = nlp('The time for action is now. It's never too late to do something.')",
                "sent = list(doc.sents)[0]",
                "print(sent._.parse_string)",
                "# (S (NP (NP (DT The) (NN time)) (PP (IN for) (NP (NN action)))) (VP (VBZ is) (ADVP (RB now))) (. .))",
                "print(sent._.labels)",
                "# ('S',)",
                "print(list(sent._.children)[0])",
                "# The time for action"
            ],
            "author": "Nikita Kitaev",
            "author_links": {
                "github": "nikitakit",
                "website": " http://kitaev.io"
            },
            "category": ["research", "pipeline"]
        },
        {
            "id": "excelcy",
            "title": "ExcelCy",
            "slogan": "Excel Integration with spaCy. Training NER using XLSX from PDF, DOCX, PPT, PNG or JPG.",
            "description": "ExcelCy is a toolkit to integrate Excel to spaCy NLP training experiences. Training NER using XLSX from PDF, DOCX, PPT, PNG or JPG. ExcelCy has pipeline to match Entity with PhraseMatcher or Matcher in regular expression.",
            "url": "https://github.com/kororo/excelcy",
            "github": "kororo/excelcy",
            "pip": "excelcy",
            "code_example": [
                "from excelcy import ExcelCy",
                "# collect sentences, annotate Entities and train NER using spaCy",
                "excelcy = ExcelCy.execute(file_path='https://github.com/kororo/excelcy/raw/master/tests/data/test_data_01.xlsx')",
                "# use the nlp object as per spaCy API",
                "doc = excelcy.nlp('Google rebrands its business apps')",
                "# or save it for faster bootstrap for application",
                "excelcy.nlp.to_disk('/model')"
            ],
            "author": "Robertus Johansyah",
            "author_links": {
                "github": "kororo"
            },
            "category": ["training"],
            "tags": ["excel"]
        },
        {
            "id": "spacy-graphql",
            "title": "spacy-graphql",
            "slogan": "Query spaCy's linguistic annotations using GraphQL",
            "github": "ines/spacy-graphql",
            "description": "A very simple and experimental app that lets you query spaCy's linguistic annotations using [GraphQL](https://graphql.org/). The API currently supports most token attributes, named entities, sentences and text categories (if available as `doc.cats`, i.e. if you added a text classifier to a model). The `meta` field will return the model meta data. Models are only loaded once and kept in memory.",
            "url": "https://explosion.ai/demos/spacy-graphql",
            "category": ["apis"],
            "tags": ["graphql"],
            "thumb": "https://i.imgur.com/xC7zpTO.png",
            "code_example": [
                "{",
                "  nlp(text: \"Zuckerberg is the CEO of Facebook.\", model: \"en_core_web_sm\") {",
                "    meta {",
                "      lang",
                "      description",
                "    }",
                "    doc {",
                "      text",
                "      tokens {",
                "        text",
                "        pos_",
                "      }",
                "      ents {",
                "        text",
                "        label_",
                "      }",
                "    }",
                "  }",
                "}"
            ],
            "code_language": "json",
            "author": "Ines Montani",
            "author_links": {
                "twitter": "_inesmontani",
                "github": "ines",
                "website": "https://ines.io"
            }
        },
        {
            "id": "spacy-js",
            "title": "spacy-js",
            "slogan": "JavaScript API for spaCy with Python REST API",
            "github": "ines/spacy-js",
            "description": "JavaScript interface for accessing linguistic annotations provided by spaCy. This project is mostly experimental and was developed for fun to play around with different ways of mimicking spaCy's Python API.\n\nThe results will still be computed in Python and made available via a REST API. The JavaScript API resembles spaCy's Python API as closely as possible (with a few exceptions, as the values are all pre-computed and it's tricky to express complex recursive relationships).",
            "code_language": "javascript",
            "code_example": [
                "const spacy = require('spacy');",
                "",
                "(async function() {",
                "    const nlp = spacy.load('en_core_web_sm');",
                "    const doc = await nlp('This is a text about Facebook.');",
                "    for (let ent of doc.ents) {",
                "        console.log(ent.text, ent.label);",
                "    }",
                "    for (let token of doc) {",
                "        console.log(token.text, token.pos, token.head.text);",
                "    }",
                "})();"
            ],
            "author": "Ines Montani",
            "author_links": {
                "twitter": "_inesmontani",
                "github": "ines",
                "website": "https://ines.io"
            },
            "category": ["nonpython"],
            "tags": ["javascript"]
        },
        {
            "id": "spacy-raspberry",
            "title": "spacy-raspberry",
            "slogan": "64bit Raspberry Pi image for spaCy and neuralcoref",
            "github": "boehm-e/spacy-raspberry",
            "thumb": "https://i.imgur.com/VCJMrE6.png",
            "image": "https://raw.githubusercontent.com/boehm-e/spacy-raspberry/master/imgs/preview.png",
            "author": "Erwan Boehm",
            "author_links": {
                "github": "boehm-e"
            },
            "category": ["apis"],
            "tags": ["raspberrypi"]
        },
        {
            "id": "spacy-wordnet",
            "title": "spacy-wordnet",
            "slogan": "WordNet meets spaCy",
            "description": "`spacy-wordnet` creates annotations that easily allow the use of WordNet and [WordNet Domains](http://wndomains.fbk.eu/) by using the [NLTK WordNet interface](http://www.nltk.org/howto/wordnet.html)",
            "github": "recognai/spacy-wordnet",
            "tags": ["wordnet", "synsets"],
            "thumb": "https://i.imgur.com/3y2uPUv.jpg",
            "code_example": [
                "import spacy",
                "from spacy_wordnet.wordnet_annotator import WordnetAnnotator ",
                "",
                "# Load an spacy model (supported models are \"es\" and \"en\") ",
                "nlp = spacy.load('en')",
                "nlp.add_pipe(WordnetAnnotator(nlp.lang), after='tagger')",
                "token = nlp('prices')[0]",
                "",
                "# wordnet object link spacy token with nltk wordnet interface by giving acces to",
                "# synsets and lemmas ",
                "token._.wordnet.synsets()",
                "token._.wordnet.lemmas()",
                "",
                "# And automatically tags with wordnet domains",
                "token._.wordnet.wordnet_domains()"
            ],
            "author": "recognai",
            "author_links": {
                "github": "recognai",
                "twitter": "recogn_ai",
                "website": "https://recogn.ai"
            },
            "category": ["pipeline"]
        },
        {
            "id": "spacy-conll",
            "title": "spacy_conll",
            "slogan": "Parse text with spaCy and gets its output in CoNLL-U format",
            "description": "This module allows you to parse a text to CoNLL-U format. It contains a pipeline component for spaCy that adds CoNLL-U properties to a Doc and its sentences. It can also be used as a command-line tool.",
            "code_example": [
                "import spacy",
                "from spacy_conll import ConllFormatter",
                "",
                "nlp = spacy.load('en')",
                "conllformatter = ConllFormatter(nlp)",
                "nlp.add_pipe(conllformatter, after='parser')",
                "doc = nlp('I like cookies. Do you?')",
                "conll = doc._.conll",
                "print(doc._.conll_str_headers)",
                "print(doc._.conll_str)"
            ],
            "code_language": "python",
            "author": "Bram Vanroy",
            "author_links": {
                "github": "BramVanroy",
                "twitter": "BramVanroy",
                "website": "https://bramvanroy.be"
            },
            "github": "BramVanroy/spacy_conll",
            "category": ["standalone", "pipeline"],
            "tags": ["linguistics", "computational linguistics", "conll"]
        },
        {
            "id": "spacy-langdetect",
            "title": "spacy-langdetect",
            "slogan": "A fully customizable language detection pipeline for spaCy",
            "description": "This module allows you to add language detection capabilites to your spaCy pipeline. Also supports custom language detectors!",
            "pip": "spacy-langdetect",
            "code_example": [
                "import spacy",
                "from spacy_langdetect import LanguageDetector",
                "nlp = spacy.load('en')",
                "nlp.add_pipe(LanguageDetector(), name='language_detector', last=True)",
                "text = 'This is an english text.'",
                "doc = nlp(text)",
                "# document level language detection. Think of it like average language of the document!",
                "print(doc._.language)",
                "# sentence level language detection",
                "for sent in doc.sents:",
                "   print(sent, sent._.language)"
            ],
            "code_language": "python",
            "author": "Abhijit Balaji",
            "author_links": {
                "github": "Abhijit-2592",
                "website": "https://abhijit-2592.github.io/"
            },
            "github": "Abhijit-2592/spacy-langdetect",
            "category": ["pipeline"],
            "tags": ["language-detection"]
        },
        {
            "id": "ludwig",
            "title": "Ludwig",
            "slogan": "A code-free deep learning toolbox",
            "description": "Ludwig makes it easy to build deep learning models for many applications, including NLP ones. It uses spaCy for tokenizing text in different languages.",
            "pip": "ludwig",
            "github": "uber/ludwig",
            "thumb": "https://i.imgur.com/j1sORgD.png",
            "url": "http://ludwig.ai",
            "author": "Piero Molino @ Uber AI",
            "author_links": {
                "github": "w4nderlust",
                "twitter": "w4nderlus7",
                "website": "http://w4nderlu.st"
            },
            "category": ["standalone", "research"]
        },
        {
            "id": "pic2phrase_bot",
            "title": "pic2phrase_bot: Photo Description Generator",
            "slogan": "A bot that generates descriptions to submitted photos, in a human-like manner.",
            "description": "pic2phrase_bot runs inside Telegram messenger and can be used to generate a phrase describing a submitted photo, employing computer vision, web scraping, and syntactic dependency analysis powered by spaCy.",
            "thumb": "https://i.imgur.com/ggVI02O.jpg",
            "image": "https://i.imgur.com/z1yhWQR.jpg",
            "url": "https://telegram.me/pic2phrase_bot",
            "author": "Yuli Vasiliev",
            "author_links": {
                "twitter": "VasilievYuli"
            },
            "category": ["standalone", "conversational"]
        },
        {
            "id": "gracyql",
            "title": "gracyql",
            "slogan": "A thin GraphQL wrapper around spacy",
            "github": "oterrier/gracyql",
            "description": "An example of a basic [Starlette](https://github.com/encode/starlette) app using [Spacy](https://github.com/explosion/spaCy) and [Graphene](https://github.com/graphql-python/graphene). The main goal is to be able to use the amazing power of spaCy from other languages and retrieving only the information you need thanks to the GraphQL query definition. The GraphQL schema tries to mimic as much as possible the original Spacy API with classes Doc, Span and Token.",
            "thumb": "https://i.imgur.com/xC7zpTO.png",
            "category": ["apis"],
            "tags": ["graphql"],
            "code_example": [
                "query ParserDisabledQuery {",
                "  nlp(model: \"en\", disable: [\"parser\", \"ner\"]) {",
                "    doc(text: \"I live in Grenoble, France\") {",
                "      text",
                "      tokens {",
                "        id",
                "        pos",
                "        lemma",
                "        dep",
                "      }",
                "      ents {",
                "        start",
                "        end",
                "        label",
                "      }",
                "    }",
                "  }",
                "}"
            ],
            "code_language": "json",
            "author": "Olivier Terrier",
            "author_links": {
                "github": "oterrier"
            }
        },
        {
            "id": "pyInflect",
            "slogan": "A Python module for word inflections",
            "description": "This package uses the [spaCy 2.0 extensions](https://spacy.io/usage/processing-pipelines#extensions) to add word inflections to the system.",
            "github": "bjascob/pyInflect",
            "pip": "pyinflect",
            "code_example": [
                "import spacy",
                "import pyinflect",
                "",
                "nlp = spacy.load('en_core_web_sm')",
                "doc = nlp('This is an example.')",
                "doc[3].tag_                # NN",
                "doc[3]._.inflect('NNS')    # examples"
            ],
            "author": "Brad Jascob",
            "author_links": {
                "github": "bjascob"
            },
            "category": ["pipeline"],
            "tags": ["inflection"]
        },
        {
            "id": "lemminflect",
            "slogan": "A Python module for English lemmatization and inflection",
            "description": "LemmInflect uses a dictionary approach to lemmatize English words and inflect them into forms specified by a user supplied [Universal Dependencies](https://universaldependencies.org/u/pos/) or [Penn Treebank](https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html) tag.  The library works with out-of-vocabulary (OOV) words by applying neural network techniques to classify word forms and choose the appropriate morphing rules. The system acts as a standalone module or as an extension to spaCy.",
            "github": "bjascob/LemmInflect",
            "pip": "lemminflect",
            "thumb": "https://raw.githubusercontent.com/bjascob/LemmInflect/master/docs/img/icons8-citrus-80.png",
            "code_example": [
                "import spacy",
                "import lemminflect",
                "",
                "nlp = spacy.load('en_core_web_sm')",
                "doc = nlp('I am testing this example.')",
                "doc[2]._.lemma()         # 'test'",
                "doc[4]._.inflect('NNS')  # 'examples'"
            ],
            "author": "Brad Jascob",
            "author_links": {
                "github": "bjascob"
            },
            "category": ["pipeline"],
            "tags": ["inflection", "lemmatizer"]
        },
        {
            "id": "blackstone",
            "title": "Blackstone",
            "slogan": "A spaCy pipeline and model for NLP on unstructured legal text",
            "description": "Blackstone is a spaCy model and library for processing long-form, unstructured legal text. Blackstone is an experimental research project from the [Incorporated Council of Law Reporting for England and Wales'](https://iclr.co.uk/) research lab, [ICLR&D](https://research.iclr.co.uk/).",
            "github": "ICLRandD/Blackstone",
            "pip": "blackstone",
            "thumb": "https://iclr.s3-eu-west-1.amazonaws.com/assets/iclrand/Blackstone/thumb.png",
            "url": "https://research.iclr.co.uk",
            "author": " ICLR&D",
            "author_links": {
                "github": "ICLRandD",
                "twitter": "ICLRanD",
                "website": "https://research.iclr.co.uk"
            },
            "category": ["scientific", "models", "research"]
        },
        {
            "id": "NGym",
            "title": "NeuralGym",
            "slogan": "A little Windows GUI for training models with spaCy",
            "description": "NeuralGym is a Python application for Windows with a graphical user interface to train models with spaCy. Run the application, select an output folder, a training data file in spaCy's data format, a spaCy model or blank model and press 'Start'.",
            "github": "d5555/NeuralGym",
            "url": "https://github.com/d5555/NeuralGym",
            "image": "https://github.com/d5555/NeuralGym/raw/master/NGym.png",
            "thumb": "https://github.com/d5555/NeuralGym/raw/master/NGym/web.png",
            "author": "d5555",
            "category": ["training"],
            "tags": ["windows"]
        },
        {
            "id": "holmes",
            "title": "Holmes",
            "slogan": "Information extraction from English and German texts based on predicate logic",
            "github": "msg-systems/holmes-extractor",
            "url": "https://github.com/msg-systems/holmes-extractor",
            "description": "Holmes is a Python 3 library that supports a number of use cases involving information extraction from English and German texts, including chatbot, structural extraction, topic matching and supervised document classification. There is a [website demonstrating intelligent search based on topic matching](https://holmes-demo.xt.msg.team).",
            "pip": "holmes-extractor",
            "category": ["conversational", "standalone"],
            "tags": ["chatbots", "text-processing"],
            "thumb": "https://raw.githubusercontent.com/msg-systems/holmes-extractor/master/docs/holmes_thumbnail.png",
            "code_example": [
                "import holmes_extractor as holmes",
                "holmes_manager = holmes.Manager(model='en_core_web_lg')",
                "holmes_manager.register_search_phrase('A big dog chases a cat')",
                "holmes_manager.start_chatbot_mode_console()"
            ],
            "author": "Richard Paul Hudson",
            "author_links": {
                "github": "richardpaulhudson"
            }
        },
        {
            "id": "spacy-transformers",
            "title": "spacy-transformers",
            "slogan": "spaCy pipelines for pretrained BERT, XLNet and GPT-2",
            "description": "This package provides spaCy model pipelines that wrap [Hugging Face's `transformers`](https://github.com/huggingface/transformers) package, so you can use them in spaCy. The result is convenient access to state-of-the-art transformer architectures, such as BERT, GPT-2, XLNet, etc.",
            "github": "explosion/spacy-transformers",
            "url": "https://explosion.ai/blog/spacy-transformers",
            "pip": "spacy-transformers",
            "category": ["pipeline", "models", "research"],
            "code_example": [
                "import spacy",
                "",
                "nlp = spacy.load(\"en_trf_bertbaseuncased_lg\")",
                "doc = nlp(\"Apple shares rose on the news. Apple pie is delicious.\")",
                "print(doc[0].similarity(doc[7]))",
                "print(doc._.trf_last_hidden_state.shape)"
            ],
            "author": "Explosion",
            "author_links": {
                "twitter": "explosion_ai",
                "github": "explosion",
                "website": "https://explosion.ai"
            }
        },
        {
            "id": "negspacy",
            "title": "negspaCy",
            "slogan": "spaCy pipeline object for negating concepts in text based on the NegEx algorithm.",
            "github": "jenojp/negspacy",
            "url": "https://github.com/jenojp/negspacy",
            "description": "negspacy is a spaCy pipeline component that evaluates whether Named Entities are negated in text. It adds an extension to 'Span' objects.",
            "pip": "negspacy",
            "category": ["pipeline", "scientific"],
            "tags": ["negation", "text-processing"],
            "thumb": "https://github.com/jenojp/negspacy/blob/master/docs/thumb.png?raw=true",
            "image": "https://github.com/jenojp/negspacy/blob/master/docs/icon.png?raw=true",
            "code_example": [
                "import spacy",
                "from negspacy.negation import Negex",
                "",
                "nlp = spacy.load(\"en_core_web_sm\")",
                "negex = Negex(nlp, ent_types=[\"PERSON','ORG\"])",
                "nlp.add_pipe(negex, last=True)",
                "",
                "doc = nlp(\"She does not like Steve Jobs but likes Apple products.\")",
                "for e in doc.ents:",
                "    print(e.text, e._.negex)"
            ],
            "author": "Jeno Pizarro",
            "author_links": {
                "github": "jenojp",
                "twitter": "jenojp"
            }
        },
        {
            "id": "ronec",
            "title": "RONEC - Romanian Named Entity Corpus",
            "slogan": "Named Entity Recognition corpus for Romanian language.",
            "github": "dumitrescustefan/ronec",
            "url": "https://github.com/dumitrescustefan/ronec",
            "description": "The corpus holds 5127 sentences, annotated with 16 classes, with a total of 26376 annotated entities. The corpus comes into two formats: BRAT and CONLLUP.",
            "category": ["standalone", "models"],
            "tags": ["ner", "romanian"],
            "thumb": "https://raw.githubusercontent.com/dumitrescustefan/ronec/master/res/thumb.png",
            "code_example": [
                "# to train a new model on ronec",
                "python3 convert_spacy.py ronec/conllup/ronec.conllup output",
                "python3 -m spacy train ro models output/train_ronec.json output/train_ronec.json -p ent",
                "",
                "# download the Romanian NER model",
                "python -m spacy download ro_ner",
                "",
                "# load the model and print entities for a simple sentence",
                "import spacy",
                "",
                "nlp = spacy.load(\"ro_ner\")",
                "doc = nlp(\"Popescu Ion a fost la Cluj\")",
                "",
                "for ent in doc.ents:",
                "\tprint(ent.text, ent.start_char, ent.end_char, ent.label_)"
            ],
            "author": "Stefan Daniel Dumitrescu, Andrei-Marius Avram"
        },
        {
            "id": "num_fh",
            "title": "Numeric Fused-Head",
            "slogan": "Numeric Fused-Head Identificaiton and Resolution in English",
            "description": "This package provide a wrapper for the Numeric Fused-Head in English. It provides another information layer on numbers that refer to another entity which is not obvious from the syntactic tree.",
            "github": "yanaiela/num_fh",
            "pip": "num_fh",
            "category": ["pipeline", "research"],
            "code_example": [
                "import spacy",
                "from num_fh import NFH",
                "nlp = spacy.load('en_core_web_sm')",
                "nfh = NFH(nlp)",
                "nlp.add_pipe(nfh, first=False)",
                "doc = nlp(\"I told you two, that only one of them is the one who will get 2 or 3 icecreams\")",
                "",
                "assert doc[16]._.is_nfh == True",
                "assert doc[18]._.is_nfh == False",
                "assert doc[3]._.is_deter_nfh == True",
                "assert doc[16]._.is_deter_nfh == False",
                "assert len(doc._.nfh) == 4"
            ],
            "author": "Yanai Elazar",
            "author_links": {
                "github": "yanaiela",
                "twitter": "yanaiela",
                "website": "https://yanaiela.github.io"
            }
        },
        {
            "id": "presidio",
            "title": "Presidio",
            "slogan": "Context aware, pluggable and customizable data protection and PII data anonymization",
            "description": "Presidio *(Origin from Latin praesidium ‘protection, garrison’)* helps to ensure sensitive text is properly managed and governed. It provides fast ***analytics*** and ***anonymization*** for sensitive text such as credit card numbers, names, locations, social security numbers, bitcoin wallets, US phone numbers and financial data. Presidio analyzes the text using predefined or custom recognizers to identify entities, patterns, formats, and checksums with relevant context.",
            "url": "https://aka.ms/presidio",
            "image": "https://raw.githubusercontent.com/microsoft/presidio/master/docs/assets/before-after.png",
            "github": "microsoft/presidio",
            "category": ["standalone"],
            "thumb": "https://avatars0.githubusercontent.com/u/6154722",
            "author": "Microsoft",
            "author_links": {
                "github": "microsoft"
            }
        },
        {
            "id": "presidio-research",
            "title": "Presidio Research",
            "slogan": "Toolbox for developing and evaluating PII detectors, NER models for PII and generating fake PII data",
            "description": "This package features data-science related tasks for developing new recognizers for Microsoft Presidio. It is used for the evaluation of the entire system, as well as for evaluating specific PII recognizers or PII detection models. Anyone interested in evaluating an existing Microsoft Presidio instance, a specific PII recognizer or to develop new models or logic for detecting PII could leverage the preexisting work in this package. Additionally, anyone interested in generating new data based on previous datasets (e.g. to increase the coverage of entity values) for Named Entity Recognition models could leverage the data generator contained in this package.",
            "url": "https://aka.ms/presidio-research",
            "github": "microsoft/presidio-research",
            "category": ["standalone"],
            "thumb": "https://avatars0.githubusercontent.com/u/6154722",
            "author": "Microsoft",
            "author_links": {
                "github": "microsoft"
            }
        },
        {
            "id": "python-sentence-boundary-disambiguation",
            "title": "pySBD - python Sentence Boundary Disambiguation",
            "slogan": "Rule-based sentence boundary detection that works out-of-the-box",
            "github": "nipunsadvilkar/pySBD",
            "description": "pySBD is 'real-world' sentence segmenter which extracts reasonable sentences when the format and domain of the input text are unknown. It is a rules-based algorithm based on [The Golden Rules](https://s3.amazonaws.com/tm-town-nlp-resources/golden_rules.txt) - a set of tests to check accuracy of segmenter in regards to edge case scenarios developed by [TM-Town](https://www.tm-town.com/) dev team. pySBD is python port of ruby gem [Pragmatic Segmenter](https://github.com/diasks2/pragmatic_segmenter).",
            "pip": "pysbd",
            "category": ["scientific"],
            "tags": ["sentence segmentation"],
            "code_example": [
                "from pysbd.util import PySBDFactory",
                "",
                "nlp = spacy.blank('en')",
                "nlp.add_pipe(PySBDFactory(nlp))",
                "",
                "doc = nlp('My name is Jonas E. Smith. Please turn to p. 55.')",
                "print(list(doc.sents))",
                "# [My name is Jonas E. Smith., Please turn to p. 55.]"
            ],
            "author": "Nipun Sadvilkar",
            "author_links": {
                "twitter": "nipunsadvilkar",
                "github": "nipunsadvilkar",
                "website": "https://nipunsadvilkar.github.io"
            }
        },
        {
            "id": "cookiecutter-spacy-fastapi",
            "title": "cookiecutter-spacy-fastapi",
            "slogan": "Docker-based cookiecutter for easy spaCy APIs using FastAPI",
            "description": "Docker-based cookiecutter for easy spaCy APIs using FastAPI. The default endpoints expect batch requests with a list of Records in the Azure Search Cognitive Skill format. So out of the box, this cookiecutter can be setup as a Custom Cognitive Skill. For more on Azure Search and Cognitive Skills [see this page](https://docs.microsoft.com/en-us/azure/search/cognitive-search-custom-skill-interface).",
            "url": "https://github.com/microsoft/cookiecutter-spacy-fastapi",
            "image": "https://raw.githubusercontent.com/microsoft/cookiecutter-spacy-fastapi/master/images/cookiecutter-docs.png",
            "github": "microsoft/cookiecutter-spacy-fastapi",
            "category": ["apis"],
            "thumb": "https://avatars0.githubusercontent.com/u/6154722",
            "author": "Microsoft",
            "author_links": {
                "github": "microsoft"
            }
        },
        {
            "id": "dframcy",
            "title": "Dframcy",
            "slogan": "Dataframe Integration with spaCy NLP",
            "github": "yash1994/dframcy",
            "description": "DframCy is a light-weight utility module to integrate Pandas Dataframe to spaCy's linguistic annotation and training tasks.",
            "pip": "dframcy",
            "category": ["pipeline", "training"],
            "tags": ["pandas"],
            "code_example": [
                "import spacy",
                "from dframcy import DframCy",
                "",
                "nlp = spacy.load('en_core_web_sm')",
                "dframcy = DframCy(nlp)",
                "doc = dframcy.nlp(u'Apple is looking at buying U.K. startup for $1 billion')",
                "annotation_dataframe = dframcy.to_dataframe(doc)"
            ],
            "author": "Yash Patadia",
            "author_links": {
                "twitter": "PatadiaYash",
                "github": "yash1994"
            }
        },
        {
            "id": "spacy-pytextrank",
            "title": "PyTextRank",
            "slogan": "Py impl of TextRank for lightweight phrase extraction",
            "description": "An implementation of TextRank in Python for use in spaCy pipelines which provides fast, effective phrase extraction from texts, along with extractive summarization. The graph algorithm works independent of a specific natural language and does not require domain knowledge. See (Mihalcea 2004) https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf",
            "github": "DerwenAI/pytextrank",
            "pip": "pytextrank",
            "code_example": [
                "import spacy",
                "import pytextrank",
                "",
                "nlp = spacy.load('en_core_web_sm')",
                "",
                "tr = pytextrank.TextRank()",
                "nlp.add_pipe(tr.PipelineComponent, name='textrank', last=True)",
                "",
                "text = 'Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered.'",
                "doc = nlp(text)",
                "",
                "# examine the top-ranked phrases in the document",
                "for p in doc._.phrases:",
                "    print('{:.4f} {:5d}  {}'.format(p.rank, p.count, p.text))",
                "    print(p.chunks)"
            ],
            "code_language": "python",
            "url": "https://github.com/DerwenAI/pytextrank/wiki",
            "thumb": "https://memegenerator.net/img/instances/66942896.jpg",
            "image": "https://memegenerator.net/img/instances/66942896.jpg",
            "author": "Paco Nathan",
            "author_links": {
                "twitter": "pacoid",
                "github": "ceteri",
                "website": "https://derwen.ai/paco"
            },
            "category": ["pipeline"],
            "tags": ["phrase extraction", "ner", "summarization", "graph algorithms", "textrank"]
        },
        {
            "id": "spacy_syllables",
            "title": "Spacy Syllables",
            "slogan": "Multilingual syllable annotations",
            "description": "Spacy Syllables is a pipeline component that adds multilingual syllable annotations to Tokens. It uses Pyphen under the hood and has support for a long list of languages.",
            "github": "sloev/spacy-syllables",
            "pip": "spacy_syllables",
            "code_example": [
                "import spacy",
                "from spacy_syllables import SpacySyllables",
                "",
                "nlp = spacy.load('en_core_web_sm')",
                "syllables = SpacySyllables(nlp)",
                "nlp.add_pipe(syllables, after='tagger')",
                "",
                "doc = nlp('terribly long')",
                "",
                "data = [",
                "    (token.text, token._.syllables, token._.syllables_count)",
                "    for token in doc",
                "]",
                "",
                "assert data == [",
                "    ('terribly', ['ter', 'ri', 'bly'], 3),",
                "    ('long', ['long'], 1)",
                "]"
            ],
            "thumb": "https://raw.githubusercontent.com/sloev/spacy-syllables/master/logo.png",
            "author": "Johannes Valbjørn",
            "author_links": {
                "github": "sloev"
            },
            "category": ["pipeline"],
            "tags": ["syllables", "multilingual"]
        },
        {
            "id": "gobbli",
            "title": "gobbli",
            "slogan": "Deep learning for text classification doesn't have to be scary",
            "description": "gobbli is a Python library which wraps several modern deep learning models in a uniform interface that makes it easy to evaluate feasibility and conduct analyses. It leverages the abstractive powers of Docker to hide nearly all dependency management and functional differences between models from the user. It also contains an interactive app for exploring text data and evaluating classification models. spaCy's base text classification models, as well as models integrated from `spacy-transformers`, are available in the collection of classification models. In addition, spaCy is used for data augmentation and document embeddings.",
            "url": "https://github.com/rtiinternational/gobbli",
            "github": "rtiinternational/gobbli",
            "pip": "gobbli",
            "thumb": "https://i.postimg.cc/NGpzhrdr/gobbli-lg.png",
            "code_example": [
                "from gobbli.io import PredictInput, TrainInput",
                "from gobbli.model.bert import BERT",
                "",
                "train_input = TrainInput(",
                "    X_train=['This is a training document.', 'This is another training document.'],",
                "    y_train=['0', '1'],",
                "    X_valid=['This is a validation sentence.', 'This is another validation sentence.'],",
                "    y_valid=['1', '0'],",
                ")",
                "",
                "clf = BERT()",
                "",
                "# Set up classifier resources -- Docker image, etc.",
                "clf.build()",
                "",
                "# Train model",
                "train_output = clf.train(train_input)",
                "",
                "predict_input = PredictInput(",
                "    X=['Which class is this document?'],",
                "    labels=train_output.labels,",
                "    checkpoint=train_output.checkpoint,",
                ")",
                "",
                "predict_output = clf.predict(predict_input)"
            ],
            "category": ["standalone"]
        },
        {
            "id": "spacy_fastlang",
            "title": "Spacy FastLang",
            "slogan": "Language detection done fast",
            "description": "Fast language detection using FastText and Spacy.",
            "github": "thomasthiebaud/spacy-fastlang",
            "pip": "spacy_fastlang",
            "code_example": [
                "import spacy",
                "from spacy_fastlang import LanguageDetector",
                "",
                "nlp = spacy.load('en_core_web_sm')",
                "nlp.add_pipe(LanguageDetector())",
                "doc = nlp('Life is like a box of chocolates. You never know what you are gonna get.')",
                "",
                "assert doc._.language == 'en'",
                "assert doc._.language_score >= 0.8"
            ],
            "author": "Thomas Thiebaud",
            "author_links": {
                "github": "thomasthiebaud"
            },
            "category": ["pipeline"]
        },
        {
            "id": "mlflow",
            "title": "MLflow",
            "slogan": "An open source platform for the machine learning lifecycle",
            "description": "MLflow is an open source platform to manage the ML lifecycle, including experimentation, reproducibility, deployment, and a central model registry. MLflow currently offers four components: Tracking, Projects, Models and Registry.",
            "github": "mlflow/mlflow",
            "pip": "mlflow",
            "thumb": "https://www.mlflow.org/docs/latest/_static/MLflow-logo-final-black.png",
            "image": "",
            "url": "https://mlflow.org/",
            "author": "Databricks",
            "author_links": {
                "github": "databricks",
                "twitter": "databricks",
                "website": "https://databricks.com/"
            },
            "category": ["standalone", "apis"],
            "code_example": [
                "import mlflow",
                "import mlflow.spacy",
                "",
                "# MLflow Tracking",
                "nlp = spacy.load('my_best_model_path/output/model-best')",
                "with mlflow.start_run(run_name='Spacy'):",
                "    mlflow.set_tag('model_flavor', 'spacy')",
                "    mlflow.spacy.log_model(spacy_model=nlp, artifact_path='model')",
                "    mlflow.log_metric(('accuracy', 0.72))",
                "    my_run_id = mlflow.active_run().info.run_id",
                "",
                "",
                "# MLflow Models",
                "model_uri = f'runs:/{my_run_id}/model'",
                "nlp2 = mlflow.spacy.load_model(model_uri=model_uri)"
            ]
        },
        {
            "id": "pyate",
            "title": "PyATE",
            "slogan": "Python Automated Term Extraction",
            "description": "PyATE is a term extraction library written in Python using Spacy POS tagging with Basic, Combo Basic, C-Value, TermExtractor, and Weirdness.",
            "github": "kevinlu1248/pyate",
            "pip": "pyate",
            "code_example": [
                "import spacy",
                "from pyate.term_extraction_pipeline import TermExtractionPipeline",
                "",
                "nlp = spacy.load('en_core_web_sm')",
                "nlp.add_pipe(TermExtractionPipeline())",
                "# source: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1994795/",
                "string = 'Central to the development of cancer are genetic changes that endow these “cancer cells” with many of the hallmarks of cancer, such as self-sufficient growth and resistance to anti-growth and pro-death signals. However, while the genetic changes that occur within cancer cells themselves, such as activated oncogenes or dysfunctional tumor suppressors, are responsible for many aspects of cancer development, they are not sufficient. Tumor promotion and progression are dependent on ancillary processes provided by cells of the tumor environment but that are not necessarily cancerous themselves. Inflammation has long been associated with the development of cancer. This review will discuss the reflexive relationship between cancer and inflammation with particular focus on how considering the role of inflammation in physiologic processes such as the maintenance of tissue homeostasis and repair may provide a logical framework for understanding the connection between the inflammatory response and cancer.'",
                "",
                "doc = nlp(string)",
                "print(doc._.combo_basic.sort_values(ascending=False).head(5))",
                "\"\"\"\"\"\"",
                "dysfunctional tumor                1.443147",
                "tumor suppressors                  1.443147",
                "genetic changes                    1.386294",
                "cancer cells                       1.386294",
                "dysfunctional tumor suppressors    1.298612",
                "\"\"\"\"\"\""
            ],
            "code_language": "python",
            "url": "https://github.com/kevinlu1248/pyate",
            "author": "Kevin Lu",
            "author_links": {
                "twitter": "kevinlu1248",
                "github": "kevinlu1248",
                "website": "https://github.com/kevinlu1248/pyate"
            },
            "category": ["pipeline", "research"],
            "tags": ["term_extraction"]
        },
        {
            "id": "contextualSpellCheck",
            "title": "Contextual Spell Check",
            "slogan": "Contextual spell correction using BERT (bidirectional representations)",
            "description": "This package currently focuses on Out of Vocabulary (OOV) word or non-word error (NWE) correction using BERT model. The idea of using BERT was to use the context when correcting NWE. In the coming days, I would like to focus on RWE and optimising the package by implementing it in cython.",
            "github": "R1j1t/contextualSpellCheck",
            "pip": "contextualSpellCheck",
            "code_example": [
                "import spacy",
                "import contextualSpellCheck",
                "",
                "nlp = spacy.load('en')",
                "contextualSpellCheck.add_to_pipe(nlp)",
                "doc = nlp('Income was $9.4 milion compared to the prior year of $2.7 milion.')",
                "",
                "print(doc._.performed_spellCheck) #Should be True",
                "print(doc._.outcome_spellCheck) #Income was $9.4 million compared to the prior year of $2.7 million."
            ],
            "code_language": "python",
            "url": "https://github.com/R1j1t/contextualSpellCheck",
            "thumb": "https://user-images.githubusercontent.com/22280243/82760949-98e68480-9e14-11ea-952e-4738620fd9e3.png",
            "image": "https://user-images.githubusercontent.com/22280243/82138959-2852cd00-9842-11ea-918a-49b2a7873ef6.png",
            "author": "Rajat Goel",
            "author_links": {
                "github": "r1j1t",
                "website": "https://github.com/R1j1t"
            },
            "category": ["pipeline", "conversational", "research"],
            "tags": ["spell check", "correction", "preprocessing", "translation", "correction"]
        },
        {
            "id": "texthero",
            "title": "Texthero",
            "slogan": "Text preprocessing, representation and visualization from zero to hero.",
            "description": "Texthero is a python package to work with text data efficiently. It empowers NLP developers with a tool to quickly understand any text-based dataset and it provides a solid pipeline to clean and represent text data, from zero to hero.",
            "github": "jbesomi/texthero",
            "pip": "texthero",
            "code_example": [
                "import texthero as hero",
                "import pandas as pd",
                "",
                "df = pd.read_csv('https://github.com/jbesomi/texthero/raw/master/dataset/bbcsport.csv')",
                "df['named_entities'] = hero.named_entities(df['text'])",
                "df.head()"
            ],
            "code_language": "python",
            "url": "https://texthero.org",
            "thumb": "https://texthero.org/img/T.png",
            "image": "https://texthero.org/docs/assets/texthero.png",
            "author": "Jonathan Besomi",
            "author_links": {
                "github": "jbesomi",
                "website": "https://besomi.ai"
            },
            "category": ["standalone"]
        },
        {
            "id": "cov-bsv",
            "title": "VA COVID-19 NLP BSV",
            "slogan": "spaCy pipeline for COVID-19 surveillance.",
            "github": "abchapman93/VA_COVID-19_NLP_BSV",
            "description": "A spaCy rule-based pipeline for identifying positive cases of COVID-19 from clinical text. A version of this system was deployed as part of the US Department of Veterans Affairs biosurveillance response to COVID-19.",
            "pip": "cov-bsv",
            "code_example": [
              "import cov_bsv",
              "",
              "nlp = cov_bsv.load()",
              "text = 'Pt tested for COVID-19. His wife was recently diagnosed with novel coronavirus. SARS-COV-2: Detected'",
              "",
              "print(doc.ents)",
              "print(doc._.cov_classification)",
              "cov_bsv.visualize_doc(doc)"
            ],
            "category": ["pipeline", "standalone", "biomedical", "scientific"],
            "tags": ["clinical", "epidemiology", "covid-19", "surveillance"],
            "author": "Alec Chapman",
            "author_links": {
                "github": "abchapman93"
            }
        }
    ],

    "categories": [
        {
            "label": "Projects",
            "items": [
                {
                    "id": "pipeline",
                    "title": "Pipeline",
                    "description": "Custom pipeline components and extensions"
                },
                {
                    "id": "training",
                    "title": "Training",
                    "description": "Helpers and toolkits for training spaCy models"
                },
                {
                    "id": "conversational",
                    "title": "Conversational",
                    "description": "Frameworks and utilities for working with conversational text, e.g. for chat bots"
                },
                {
                    "id": "research",
                    "title": "Research",
                    "description": "Frameworks and utilities for developing better NLP models, especially using neural networks"
                },
                {
                    "id": "scientific",
                    "title": "Scientific",
                    "description": "Frameworks and utilities for scientific text processing"
                },
                {
                    "id": "visualizers",
                    "title": "Visualizers",
                    "description": "Demos and tools to visualize NLP annotations or systems"
                },
                {
                    "id": "apis",
                    "title": "Containers & APIs",
                    "description": "Infrastructure tools for managing or deploying spaCy"
                },
                {
                    "id": "nonpython",
                    "title": "Non-Python",
                    "description": "Wrappers, bindings and implementations in other programming languages"
                },
                {
                    "id": "standalone",
                    "title": "Standalone",
                    "description": "Self-contained libraries or tools that use spaCy under the hood"
                },
                {
                    "id": "models",
                    "title": "Models",
                    "description": "Third-party pretrained models for different languages and domains"
                }
            ]
        },
        {
            "label": "Education",
            "items": [
                {
                    "id": "books",
                    "title": "Books",
                    "description": "Books about or featuring spaCy"
                },
                {
                    "id": "courses",
                    "title": "Courses",
                    "description": "Online courses and interactive tutorials"
                },
                {
                    "id": "videos",
                    "title": "Videos",
                    "description": "Talks and tutorials in video format"
                },
                {
                    "id": "podcasts",
                    "title": "Podcasts",
                    "description": "Episodes about spaCy or interviews with the spaCy team"
                }
            ]
        }
    ]
}
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								{
 								    "resources": [
-												Fix and update universe.json [ci skip]

											
										
										
											2020-07-07 22:12:28 +03:00
+								        {
 								            "id": "spacy-streamlit",
 								            "title": "spacy-streamlit",
 								            "slogan": "spaCy building blocks for Streamlit apps",
 								            "github": "explosion/spacy-streamlit",
 								            "description": "This package contains utilities for visualizing spaCy models and building interactive spaCy-powered apps with [Streamlit](https://streamlit.io). It includes various building blocks you can use in your own Streamlit app, like visualizers for **syntactic dependencies**, **named entities**, **text classification**, **semantic similarity** via word vectors, token attributes, and more.",
 								            "pip": "spacy-streamlit",
 								            "category": ["visualizers"],
 								            "thumb": "https://i.imgur.com/mhEjluE.jpg",
 								            "image": "https://user-images.githubusercontent.com/13643239/85388081-f2da8700-b545-11ea-9bd4-e303d3c5763c.png",
 								            "code_example": [
 								                "import spacy_streamlit",
 								                "",
 								                "models = [\"en_core_web_sm\", \"en_core_web_md\"]",
 								                "default_text = \"Sundar Pichai is the CEO of Google.\"",
 								                "spacy_streamlit.visualize(models, default_text))"
 								            ],
 								            "author": "Ines Montani",
 								            "author_links": {
 								                "twitter": "_inesmontani",
 								                "github": "ines",
 								                "website": "https://ines.io"
 								            }
 								        },
-												Adding spaczz package to universe.json (#5717)

* Adding spaczz package to universe.json

* Adding contributor agreement.
											
										
										
											2020-07-07 21:55:24 +03:00
+								        {
 								            "id": "spaczz",
 								            "title": "spaczz",
 								            "slogan": "Fuzzy matching and more for spaCy.",
 								            "description": "Spaczz provides fuzzy matching and multi-token regex matching functionality for spaCy. Spaczz's components have similar APIs to their spaCy counterparts and spaczz pipeline components can integrate into spaCy pipelines where they can be saved/loaded as models.",
 								            "github": "gandersen101/spaczz",
 								            "pip": "spaczz",
 								            "code_example": [
 								                "import spacy",
 								                "from spaczz.pipeline import SpaczzRuler",
 								                "",
 								                "nlp = spacy.blank('en')",
 								                "ruler = SpaczzRuler(nlp)",
 								                "ruler.add_patterns([{'label': 'PERSON', 'pattern': 'Bill Gates', 'type': 'fuzzy'}])",
 								                "nlp.add_pipe(ruler)",
 								                "",
-												Fix quote issue in spaczz universe.json

											
										
										
											2020-07-08 03:16:28 +03:00
+								                "doc = nlp('Oops, I spelled Bill Gatez wrong.')",
-												Adding spaczz package to universe.json (#5717)

* Adding spaczz package to universe.json

* Adding contributor agreement.
											
										
										
											2020-07-07 21:55:24 +03:00
+								                "print([(ent.text, ent.start, ent.end, ent.label_) for ent in doc.ents])"
 								            ],
 								            "code_language": "python",
 								            "url": "https://spaczz.readthedocs.io/en/latest/",
 								            "author": "Grant Andersen",
 								            "author_links": {
 								                "twitter": "gandersen101",
 								                "github": "gandersen101"
 								            },
 								            "category": ["pipeline"],
 								            "tags": ["fuzzy-matching", "regex"]
 								        },
-												adding spacy-universal-sentence-encoder (#5534)

* adding spacy-universal-sentence-encoder

* update affiliation

* updated code example
											
										
										
											2020-06-08 21:26:30 +03:00
+								        {
 								            "id": "spacy-universal-sentence-encoder",
 								            "title": "SpaCy - Universal Sentence Encoder",
 								            "slogan": "Make use of Google's Universal Sentence Encoder directly within SpaCy",
 								            "description": "This library lets you use Universal Sentence Encoder embeddings of Docs, Spans and Tokens directly from TensorFlow Hub",
 								            "github": "MartinoMensio/spacy-universal-sentence-encoder-tfhub",
 								            "code_example": [
 								                "import spacy_universal_sentence_encoder",
 								                "load one of the models: ['en_use_md', 'en_use_lg', 'xx_use_md', 'xx_use_lg']",
 								                "nlp = spacy_universal_sentence_encoder.load_model('en_use_lg')",
 								                "# get two documents",
 								                "doc_1 = nlp('Hi there, how are you?')",
 								                "doc_2 = nlp('Hello there, how are you doing today?')",
 								                "# use the similarity method that is based on the vectors, on Doc, Span or Token",
 								                "print(doc_1.similarity(doc_2[0:7]))"
 								            ],
 								            "category": ["models", "pipeline"],
 								            "author": "Martino Mensio",
 								            "author_links": {
 								                "twitter": "MartinoMensio",
 								                "github": "MartinoMensio",
 								                "website": "https://martinomensio.github.io"
 								            }
 								        },
-												add "whatlies" to spaCy universe (#5252)

* Add "whatlies"

We're releasing it on our side officially on the 16th of April. If possible, let's announce around the same time :)

* sign contributor thing

* Added fancy gif

as the image

* Update universe.json

Spellin error and spaCy clarification.
											
										
										
											2020-04-06 12:29:30 +03:00
+								        {
 								            "id": "whatlies",
 								            "title": "whatlies",
 								            "slogan": "Make interactive visualisations to figure out 'what lies' in word embeddings.",
 								            "description": "This small library offers tools to make visualisation easier of both word embeddings as well as operations on them. It has support for spaCy prebuilt models as a first class citizen but also offers support for sense2vec. There's a convenient API to perform linear algebra as well as support for popular transformations like PCA/UMAP/etc.",
 								            "github": "rasahq/whatlies",
 								            "pip": "whatlies",
 								            "thumb": "https://i.imgur.com/rOkOiLv.png",
 								            "image": "https://raw.githubusercontent.com/RasaHQ/whatlies/master/docs/gif-two.gif",
 								            "code_example": [
 								                "from whatlies import EmbeddingSet",
 								                "from whatlies.language import SpacyLanguage",
 								                "",
 								                "lang = SpacyLanguage('en_core_web_md')",
-												fix json (#5267)


											
										
										
											2020-04-08 13:58:09 +03:00
+								                "words = ['cat', 'dog', 'fish', 'kitten', 'man', 'woman', 'king', 'queen', 'doctor', 'nurse']",
-												add "whatlies" to spaCy universe (#5252)

* Add "whatlies"

We're releasing it on our side officially on the 16th of April. If possible, let's announce around the same time :)

* sign contributor thing

* Added fancy gif

as the image

* Update universe.json

Spellin error and spaCy clarification.
											
										
										
											2020-04-06 12:29:30 +03:00
+								                "",
 								                "emb = lang[words]",
 								                "emb.plot_interactive(x_axis='man', y_axis='woman')"
 								            ],
 								            "category": ["visualizers", "research"],
 								            "author": "Vincent D. Warmerdam",
 								            "author_links": {
 								                "twitter": "fishnets88",
 								                "github": "koaning",
 								                "website": "https://koaning.io"
 								            }
 								        },
-												Update universe.json [ci skip]

											
										
										
											2020-03-17 21:53:31 +03:00
+								        {
 								            "id": "spacy-stanza",
 								            "title": "spacy-stanza",
 								            "slogan": "Use the latest Stanza (StanfordNLP) research models directly in spaCy",
 								            "description": "This package wraps the Stanza (formerly StanfordNLP) library, so you can use Stanford's models as a spaCy pipeline. Using this wrapper, you'll be able to use the following annotations, computed by your pretrained `stanza` model:\n\n- Statistical tokenization (reflected in the `Doc` and its tokens)\n - Lemmatization (`token.lemma` and `token.lemma_`)\n - Part-of-speech tagging (`token.tag`, `token.tag_`, `token.pos`, `token.pos_`)\n - Dependency parsing (`token.dep`, `token.dep_`, `token.head`)\n - Named entity recognition (`doc.ents`, `token.ent_type`, `token.ent_type_`, `token.ent_iob`, `token.ent_iob_`)\n - Sentence segmentation (`doc.sents`)",
 								            "github": "explosion/spacy-stanza",
-												Update universe.json [ci skip]

											
										
										
											2020-03-18 00:21:34 +03:00
+								            "pip": "spacy-stanza",
-												Update universe.json [ci skip]

											
										
										
											2020-03-17 21:53:31 +03:00
+								            "thumb": "https://i.imgur.com/myhLjMJ.png",
 								            "code_example": [
 								                "import stanza",
 								                "from spacy_stanza import StanzaLanguage",
 								                "",
 								                "snlp = stanza.Pipeline(lang=\"en\")",
 								                "nlp = StanzaLanguage(snlp)",
 								                "",
 								                "doc = nlp(\"Barack Obama was born in Hawaii. He was elected president in 2008.\")",
 								                "for token in doc:",
 								                "    print(token.text, token.lemma_, token.pos_, token.dep_, token.ent_type_)",
 								                "print(doc.ents)"
 								            ],
 								            "category": ["pipeline", "standalone", "models", "research"],
 								            "author": "Explosion",
 								            "author_links": {
 								                "twitter": "explosion_ai",
 								                "github": "explosion",
 								                "website": "https://explosion.ai"
 								            }
 								        },
-												Add "spaCy Server" to spaCy Universe (#4553)

* Add "spaCy Server" to spaCy Universe

* Accept the spaCy Contributor Agreement

											
										
										
											2019-10-30 15:20:46 +03:00
+								        {
 								            "id": "spacy-server",
 								            "title": "spaCy Server",
 								            "slogan": "\uD83E\uDD9C Containerized HTTP API for spaCy NLP",
 								            "description": "For developers who need programming language agnostic NLP, spaCy Server is a containerized HTTP API that provides industrial-strength natural language processing. Unlike other servers, our server is fast, idiomatic, and well documented.",
 								            "github": "neelkamath/spacy-server",
 								            "code_example": [
 								                "docker run --rm -dp 8080:8080 neelkamath/spacy-server",
 								                "curl http://localhost:8080/ner -H 'Content-Type: application/json' -d '{\"sections\": [\"My name is John Doe. I grew up in California.\"]}'"
 								            ],
 								            "code_language": "shell",
 								            "url": "https://hub.docker.com/r/neelkamath/spacy-server",
 								            "author": "Neel Kamath",
 								            "author_links": {
 								                "github": "neelkamath",
 								                "website": "https://neelkamath.com"
 								            },
 								            "category": ["apis"],
 								            "tags": ["docker"]
 								        },
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								        {
 								            "id": "nlp-architect",
 								            "title": "NLP Architect",
 								            "slogan": "Python lib for exploring Deep NLP & NLU by Intel AI",
 								            "github": "NervanaSystems/nlp-architect",
 								            "pip": "nlp-architect",
-												Tidy up universe [ci skip]

											
										
										
											2019-06-02 13:38:48 +03:00
+								            "thumb": "https://i.imgur.com/vMideRx.png",
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								            "category": ["standalone", "research"],
 								            "tags": ["pytorch"]
 								        },
 								        {
 								            "id": "NeuroNER",
 								            "title": "NeuroNER",
 								            "slogan": "Named-entity recognition using neural networks",
 								            "github": "Franck-Dernoncourt/NeuroNER",
 								            "pip": "pyneuroner[cpu]",
 								            "code_example": [
 								                "from neuroner import neuromodel",
 								                "nn = neuromodel.NeuroNER(train_model=False, use_pretrained_model=True)"
 								            ],
 								            "category": ["ner"],
 								            "tags": ["standalone"]
 								        },
 								        {
 								            "id": "NLPre",
 								            "title": "NLPre",
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "slogan": "Natural Language Preprocessing Library for health data and more",
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								            "github": "NIHOPA/NLPre",
 								            "pip": "nlpre",
 								            "code_example": [
 								                "from nlpre import titlecaps, dedash, identify_parenthetical_phrases",
 								                "from nlpre import replace_acronyms, replace_from_dictionary",
 								                "ABBR = identify_parenthetical_phrases()(text)",
 								                "parsers = [dedash(), titlecaps(), replace_acronyms(ABBR),",
 								                "        replace_from_dictionary(prefix='MeSH_')]",
 								                "for f in parsers:",
 								                "    text = f(text)",
 								                "print(text)"
 								            ],
-												Added author information for NLPre (#5414)

* Add author links for NLPre and update category

* Add contributor statement
											
										
										
											2020-05-08 12:28:54 +03:00
+								            "category": ["scientific", "biomedical"],
-												Update universe and display of videos [ci skip]

											
										
										
											2020-05-21 22:54:23 +03:00
+								            "author": "Travis Hoppe",
-												Added author information for NLPre (#5414)

* Add author links for NLPre and update category

* Add contributor statement
											
										
										
											2020-05-08 12:28:54 +03:00
+								            "author_links": {
 								                "github": "thoppe",
-												Update universe and display of videos [ci skip]

											
										
										
											2020-05-21 22:54:23 +03:00
+								                "twitter": "metasemantic",
 								                "website": "http://thoppe.github.io/"
-												Added author information for NLPre (#5414)

* Add author links for NLPre and update category

* Add contributor statement
											
										
										
											2020-05-08 12:28:54 +03:00
+								            }
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								        },
 								        {
 								            "id": "Chatterbot",
 								            "title": "Chatterbot",
 								            "slogan": "A machine-learning based conversational dialog engine for creating chat bots",
 								            "github": "gunthercox/ChatterBot",
 								            "pip": "chatterbot",
-												Tidy up universe [ci skip]

											
										
										
											2019-06-02 13:38:48 +03:00
+								            "thumb": "https://i.imgur.com/eyAhwXk.jpg",
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								            "code_example": [
 								                "from chatterbot import ChatBot",
 								                "from chatterbot.trainers import ListTrainer",
 								                "# Create a new chat bot named Charlie",
 								                "chatbot = ChatBot('Charlie')",
 								                "trainer = ListTrainer(chatbot)",
 								                "trainer.train([",
 								                "'Hi, can I help you?',",
 								                "'Sure, I would like to book a flight to Iceland.",
 								                "'Your flight has been booked.'",
 								                "])",
 								                "",
 								                "response = chatbot.get_response('I would like to book a flight.')"
 								            ],
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "author": "Gunther Cox",
 								            "author_links": {
 								                "github": "gunthercox"
 								            },
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								            "category": ["conversational", "standalone"],
 								            "tags": ["chatbots"]
 								        },
-												Add Baderlab/saber to universe.json (#3806)


											
										
										
											2019-06-01 18:36:40 +03:00
+								        {
 								            "id": "saber",
 								            "title": "saber",
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								            "slogan": "Deep-learning based tool for information extraction in the biomedical domain",
-												Add Baderlab/saber to universe.json (#3806)


											
										
										
											2019-06-01 18:36:40 +03:00
+								            "github": "BaderLab/saber",
 								            "pip": "saber",
 								            "thumb": "https://raw.githubusercontent.com/BaderLab/saber/master/docs/img/saber_logo.png",
 								            "code_example": [
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								                "from saber.saber import Saber",
 								                "saber = Saber()",
 								                "saber.load('PRGE')",
-												Add Baderlab/saber to universe.json (#3806)


											
										
										
											2019-06-01 18:36:40 +03:00
+								                "saber.annotate('The phosphorylation of Hdm2 by MK2 promotes the ubiquitination of p53.')"
 								            ],
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "author": "Bader Lab, University of Toronto",
 								            "category": ["scientific"],
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								            "tags": ["keras", "biomedical"]
 								        },
 								        {
 								            "id": "alibi",
 								            "title": "alibi",
 								            "slogan": "Algorithms for monitoring and explaining machine learning models ",
 								            "github": "SeldonIO/alibi",
 								            "pip": "alibi",
-												Tidy up universe [ci skip]

											
										
										
											2019-06-02 13:38:48 +03:00
+								            "thumb": "https://i.imgur.com/YkzQHRp.png",
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								            "code_example": [
-												Tidy up universe [ci skip]

											
										
										
											2019-06-02 13:38:48 +03:00
+								                "from alibi.explainers import AnchorTabular",
 								                "explainer = AnchorTabular(predict_fn, feature_names)",
 								                "explainer.fit(X_train)",
 								                "explainer.explain(x)"
-												Add multiple packages to universe.json (#3809) [ci skip]

* Add multiple packages to universe.json

Added following packages: NLPArchitect, NLPRe, Chatterbot, alibi, NeuroNER

* Auto-format

* Update slogan (probably just copy-paste mistake)

* Adjust formatting

* Update tags / categories

											
										
										
											2019-06-02 13:35:52 +03:00
+								            ],
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "author": "Seldon",
-												Tidy up universe [ci skip]

											
										
										
											2019-06-02 13:38:48 +03:00
+								            "category": ["standalone", "research"]
-												Add Baderlab/saber to universe.json (#3806)


											
										
										
											2019-06-01 18:36:40 +03:00
+								        },
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        {
 								            "id": "spacymoji",
 								            "slogan": "Emoji handling and meta data as a spaCy pipeline component",
 								            "github": "ines/spacymoji",
 								            "description": "spaCy v2.0 extension and pipeline component for adding emoji meta data to `Doc` objects. Detects emoji consisting of one or more unicode characters, and can optionally merge multi-char emoji (combined pictures, emoji with skin tone modifiers) into one token. Human-readable emoji descriptions are added as a custom attribute, and an optional lookup table can be provided for your own descriptions. The extension sets the custom `Doc`, `Token` and `Span` attributes `._.is_emoji`, `._.emoji_desc`, `._.has_emoji` and `._.emoji`.",
 								            "pip": "spacymoji",
 								            "category": ["pipeline"],
 								            "tags": ["emoji", "unicode"],
 								            "thumb": "https://i.imgur.com/XOTYIgn.jpg",
 								            "code_example": [
 								                "import spacy",
 								                "from spacymoji import Emoji",
 								                "",
 								                "nlp = spacy.load('en')",
 								                "emoji = Emoji(nlp)",
 								                "nlp.add_pipe(emoji, first=True)",
 								                "",
-												Remove u-strings and fix formatting [ci skip]

											
										
										
											2019-09-12 17:11:15 +03:00
+								                "doc = nlp('This is a test 😻 👍🏿')",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								                "assert doc._.has_emoji == True",
 								                "assert doc[2:5]._.has_emoji == True",
 								                "assert doc[0]._.is_emoji == False",
 								                "assert doc[4]._.is_emoji == True",
-												Remove u-strings and fix formatting [ci skip]

											
										
										
											2019-09-12 17:11:15 +03:00
+								                "assert doc[5]._.emoji_desc == 'thumbs up dark skin tone'",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								                "assert len(doc._.emoji) == 2",
-												Remove u-strings and fix formatting [ci skip]

											
										
										
											2019-09-12 17:11:15 +03:00
+								                "assert doc._.emoji[1] == ('👍🏿', 5, 'thumbs up dark skin tone')"
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            ],
 								            "author": "Ines Montani",
 								            "author_links": {
 								                "twitter": "_inesmontani",
 								                "github": "ines",
 								                "website": "https://ines.io"
 								            }
 								        },
 								        {
 								            "id": "spacy_hunspell",
 								            "slogan": "Add spellchecking and spelling suggestions to your spaCy pipeline using Hunspell",
 								            "description": "This package uses the [spaCy 2.0 extensions](https://spacy.io/usage/processing-pipelines#extensions) to add [Hunspell](http://hunspell.github.io) support for spellchecking.",
 								            "github": "tokestermw/spacy_hunspell",
 								            "pip": "spacy_hunspell",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_hunspell import spaCyHunSpell",
 								                "",
 								                "nlp = spacy.load('en_core_web_sm')",
 								                "hunspell = spaCyHunSpell(nlp, 'mac')",
 								                "nlp.add_pipe(hunspell)",
 								                "doc = nlp('I can haz cheezeburger.')",
 								                "haz = doc[2]",
 								                "haz._.hunspell_spell  # False",
 								                "haz._.hunspell_suggest  # ['ha', 'haze', 'hazy', 'has', 'hat', 'had', 'hag', 'ham', 'hap', 'hay', 'haw', 'ha z']"
 								            ],
 								            "author": "Motoki Wu",
 								            "author_links": {
 								                "github": "tokestermw",
 								                "twitter": "plusepsilon"
 								            },
 								            "category": ["pipeline"],
 								            "tags": ["spellcheck"]
 								        },
 								        {
 								            "id": "spacy_grammar",
 								            "slogan": "Language Tool style grammar handling with spaCy",
 								            "description": "This packages leverages the [Matcher API](https://spacy.io/docs/usage/rule-based-matching) in spaCy to quickly match on spaCy tokens not dissimilar to regex. It  reads a `grammar.yml` file to load up custom patterns and returns the results inside `Doc`, `Span`, and `Token`. It is extensible through adding rules to `grammar.yml` (though currently only the simple string matching is implemented).",
 								            "github": "tokestermw/spacy_grammar",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_grammar.grammar import Grammar",
 								                "",
 								                "nlp = spacy.load('en')",
 								                "grammar = Grammar(nlp)",
 								                "nlp.add_pipe(grammar)",
 								                "doc = nlp('I can haz cheeseburger.')",
 								                "doc._.has_grammar_error  # True"
 								            ],
 								            "author": "Motoki Wu",
 								            "author_links": {
 								                "github": "tokestermw",
 								                "twitter": "plusepsilon"
 								            },
 								            "category": ["pipeline"]
 								        },
 								        {
 								            "id": "spacy_kenlm",
 								            "slogan": "KenLM extension for spaCy 2.0",
 								            "github": "tokestermw/spacy_kenlm",
 								            "pip": "spacy_kenlm",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_kenlm import spaCyKenLM",
 								                "",
 								                "nlp = spacy.load('en_core_web_sm')",
 								                "spacy_kenlm = spaCyKenLM()  # default model from test.arpa",
 								                "nlp.add_pipe(spacy_kenlm)",
 								                "doc = nlp('How are you?')",
 								                "doc._.kenlm_score # doc score",
 								                "doc[:2]._.kenlm_score # span score",
 								                "doc[2]._.kenlm_score # token score"
 								            ],
 								            "author": "Motoki Wu",
 								            "author_links": {
 								                "github": "tokestermw",
 								                "twitter": "plusepsilon"
 								            },
 								            "category": ["pipeline"]
 								        },
 								        {
 								            "id": "spacy_readability",
 								            "slogan": "Add text readability meta data to Doc objects",
 								            "description": "spaCy v2.0 pipeline component for calculating readability scores of of text. Provides scores for Flesh-Kincaid grade level, Flesh-Kincaid reading ease, and Dale-Chall.",
 								            "github": "mholtzscher/spacy_readability",
 								            "pip": "spacy-readability",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_readability import Readability",
 								                "",
 								                "nlp = spacy.load('en')",
 								                "read = Readability(nlp)",
 								                "nlp.add_pipe(read, last=True)",
 								                "doc = nlp(\"I am some really difficult text to read because I use obnoxiously large words.\")",
 								                "doc._.flesch_kincaid_grade_level",
 								                "doc._.flesch_kincaid_reading_ease",
 								                "doc._.dale_chall"
 								            ],
 								            "author": "Michael Holtzscher",
 								            "author_links": {
 								                "github": "mholtzscher"
 								            },
 								            "category": ["pipeline"]
 								        },
 								        {
 								            "id": "spacy-sentence-segmenter",
 								            "title": "Sentence Segmenter",
 								            "slogan": "Custom sentence segmentation for spaCy",
 								            "code_example": [
 								                "from seg.newline.segmenter import NewLineSegmenter",
 								                "import spacy",
 								                "",
 								                "nlseg = NewLineSegmenter()",
 								                "nlp = spacy.load('en')",
 								                "nlp.add_pipe(nlseg.set_sent_starts, name='sentence_segmenter', before='parser')",
 								                "doc = nlp(my_doc_text)"
 								            ],
 								            "author": "tc64",
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "author_links": {
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								                "github": "tc64"
 								            },
 								            "category": ["pipeline"]
 								        },
 								        {
 								            "id": "spacy_cld",
 								            "title": "spaCy-CLD",
 								            "slogan": "Add language detection to your spaCy pipeline using CLD2",
 								            "description": "spaCy-CLD operates on `Doc` and `Span` spaCy objects. When called on a `Doc` or `Span`, the object is given two attributes: `languages` (a list of up to 3 language codes) and `language_scores` (a dictionary mapping language codes to confidence scores between 0 and 1).\n\nspacy-cld is a little extension that wraps the [PYCLD2](https://github.com/aboSamoor/pycld2) Python library, which in turn wraps the [Compact Language Detector 2](https://github.com/CLD2Owners/cld2) C library originally built at Google for the Chromium project. CLD2 uses character n-grams as features and a Naive Bayes classifier to identify 80+ languages from Unicode text strings (or XML/HTML). It can detect up to 3 different languages in a given document, and reports a confidence score (reported in with each language.",
 								            "github": "nickdavidhaynes/spacy-cld",
 								            "pip": "spacy_cld",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_cld import LanguageDetector",
 								                "",
 								                "nlp = spacy.load('en')",
 								                "language_detector = LanguageDetector()",
 								                "nlp.add_pipe(language_detector)",
 								                "doc = nlp('This is some English text.')",
 								                "",
 								                "doc._.languages  # ['en']",
 								                "doc._.language_scores['en']  # 0.96"
 								            ],
 								            "author": "Nicholas D Haynes",
 								            "author_links": {
 								                "github": "nickdavidhaynes"
 								            },
 								            "category": ["pipeline"]
 								        },
 								        {
 								            "id": "spacy-lookup",
 								            "slogan": "A powerful entity matcher for very large dictionaries, using the FlashText module",
 								            "description": "spaCy v2.0 extension and pipeline component for adding Named Entities metadata to `Doc` objects. Detects Named Entities using dictionaries. The extension sets the custom `Doc`, `Token` and `Span` attributes `._.is_entity`, `._.entity_type`, `._.has_entities` and `._.entities`. Named Entities are matched using the python module `flashtext`, and looked up in the data provided by different dictionaries.",
 								            "github": "mpuig/spacy-lookup",
 								            "pip": "spacy-lookup",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_lookup import Entity",
 								                "",
 								                "nlp = spacy.load('en')",
-												Correct error in spacy universe docs concerning spacy-lookup (#2814)


											
										
										
											2018-10-01 11:24:50 +03:00
+								                "entity = Entity(keywords_list=['python', 'java platform'])",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								                "nlp.add_pipe(entity, last=True)",
 								                "",
-												Remove u string and auto-format [ci skip]

											
										
										
											2020-04-29 13:54:57 +03:00
+								                "doc = nlp(\"I am a product manager for a java and python.\")",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								                "assert doc._.has_entities == True",
 								                "assert doc[2:5]._.has_entities == True",
 								                "assert doc[0]._.is_entity == False",
 								                "assert doc[3]._.is_entity == True",
 								                "print(doc._.entities)"
 								            ],
 								            "author": "Marc Puig",
 								            "author_links": {
 								                "github": "mpuig"
 								            },
 								            "category": ["pipeline"]
 								        },
 								        {
 								            "id": "spacy-iwnlp",
 								            "slogan": "German lemmatization with IWNLP",
 								            "description": "This package uses the [spaCy 2.0 extensions](https://spacy.io/usage/processing-pipelines#extensions) to add [IWNLP-py](https://github.com/Liebeck/iwnlp-py) as German lemmatizer directly into your spaCy pipeline.",
 								            "github": "Liebeck/spacy-iwnlp",
 								            "pip": "spacy-iwnlp",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_iwnlp import spaCyIWNLP",
 								                "",
 								                "nlp = spacy.load('de')",
 								                "iwnlp = spaCyIWNLP(lemmatizer_path='data/IWNLP.Lemmatizer_20170501.json')",
 								                "nlp.add_pipe(iwnlp)",
 								                "doc = nlp('Wir mögen Fußballspiele mit ausgedehnten Verlängerungen.')",
 								                "for token in doc:",
 								                "    print('POS: {}\tIWNLP:{}'.format(token.pos_, token._.iwnlp_lemmas))"
 								            ],
 								            "author": "Matthias Liebeck",
 								            "author_links": {
 								                "github": "Liebeck"
 								            },
 								            "category": ["pipeline"],
 								            "tags": ["lemmatizer", "german"]
 								        },
 								        {
 								            "id": "spacy-sentiws",
 								            "slogan": "German sentiment scores with SentiWS",
 								            "description": "This package uses the [spaCy 2.0 extensions](https://spacy.io/usage/processing-pipelines#extensions) to add [SentiWS](http://wortschatz.uni-leipzig.de/en/download) as German sentiment score directly into your spaCy pipeline.",
 								            "github": "Liebeck/spacy-sentiws",
 								            "pip": "spacy-sentiws",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_sentiws import spaCySentiWS",
 								                "",
 								                "nlp = spacy.load('de')",
 								                "sentiws = spaCySentiWS(sentiws_path='data/sentiws/')",
 								                "nlp.add_pipe(sentiws)",
 								                "doc = nlp('Die Dummheit der Unterwerfung blüht in hübschen Farben.')",
 								                "",
 								                "for token in doc:",
 								                "    print('{}, {}, {}'.format(token.text, token._.sentiws, token.pos_))"
 								            ],
 								            "author": "Matthias Liebeck",
 								            "author_links": {
 								                "github": "Liebeck"
 								            },
 								            "category": ["pipeline"],
 								            "tags": ["sentiment", "german"]
 								        },
 								        {
 								            "id": "spacy-lefff",
-												Updating description and code snippet spacy-lefff (#2623)

* updating description and code snippet spacy-lefff

* contributors agreement

											
										
										
											2018-08-02 18:25:27 +03:00
+								            "slogan": "POS and French lemmatization with Lefff",
 								            "description": "spacy v2.0 extension and pipeline component for adding a French POS and lemmatizer based on [Lefff](https://hal.inria.fr/inria-00521242/).",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            "github": "sammous/spacy-lefff",
 								            "pip": "spacy-lefff",
 								            "code_example": [
 								                "import spacy",
-												Updating description and code snippet spacy-lefff (#2623)

* updating description and code snippet spacy-lefff

* contributors agreement

											
										
										
											2018-08-02 18:25:27 +03:00
+								                "from spacy_lefff import LefffLemmatizer, POSTagger",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								                "",
 								                "nlp = spacy.load('fr')",
-												Updating description and code snippet spacy-lefff (#2623)

* updating description and code snippet spacy-lefff

* contributors agreement

											
										
										
											2018-08-02 18:25:27 +03:00
+								                "pos = POSTagger()",
 								                "french_lemmatizer = LefffLemmatizer(after_melt=True)",
 								                "nlp.add_pipe(pos, name='pos', after='parser')",
 								                "nlp.add_pipe(french_lemmatizer, name='lefff', after='pos')",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								                "doc = nlp(u\"Paris est une ville très chère.\")",
 								                "for d in doc:",
-												Updating description and code snippet spacy-lefff (#2623)

* updating description and code snippet spacy-lefff

* contributors agreement

											
										
										
											2018-08-02 18:25:27 +03:00
+								                "    print(d.text, d.pos_, d._.melt_tagger, d._.lefff_lemma, d.tag_, d.lemma_)"
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            ],
 								            "author": "Sami Moustachir",
 								            "author_links": {
 								                "github": "sammous"
 								            },
 								            "category": ["pipeline"],
-												Updating description and code snippet spacy-lefff (#2623)

* updating description and code snippet spacy-lefff

* contributors agreement

											
										
										
											2018-08-02 18:25:27 +03:00
+								            "tags": ["pos", "lemmatizer", "french"]
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        },
 								        {
 								            "id": "lemmy",
 								            "title": "Lemmy",
 								            "slogan": "A Danish lemmatizer",
 								            "description": "Lemmy is a lemmatizer for Danish 🇩🇰 . It comes already trained on Dansk Sprognævns (DSN) word list (‘fuldformliste’) and the Danish Universal Dependencies and is ready for use. Lemmy also supports training on your own dataset. The model currently included in Lemmy was evaluated on the Danish Universal Dependencies dev dataset and scored an accruacy > 99%.\n\nYou can use Lemmy as a spaCy extension, more specifcally a spaCy pipeline component. This is highly recommended and makes the lemmas easily accessible from the spaCy tokens. Lemmy makes use of POS tags to predict the lemmas. When wired up to the spaCy pipeline, Lemmy has the benefit of using spaCy’s builtin POS tagger.",
 								            "github": "sorenlind/lemmy",
 								            "pip": "lemmy",
 								            "code_example": [
 								                "import da_custom_model as da # name of your spaCy model",
 								                "import lemmy.pipe",
 								                "nlp = da.load()",
 								                "",
 								                "# create an instance of Lemmy's pipeline component for spaCy",
 								                "pipe = lemmy.pipe.load()",
 								                "",
 								                "# add the comonent to the spaCy pipeline.",
 								                "nlp.add_pipe(pipe, after='tagger')",
 								                "",
 								                "# lemmas can now be accessed using the `._.lemma` attribute on the tokens",
 								                "nlp(\"akvariernes\")[0]._.lemma"
 								            ],
 								            "thumb": "https://i.imgur.com/RJVFRWm.jpg",
 								            "author": "Søren Lind Kristiansen",
 								            "author_links": {
 								                "github": "sorenlind"
 								            },
 								            "category": ["pipeline"],
 								            "tags": ["lemmatizer", "danish"]
 								        },
 								        {
 								            "id": "wmd-relax",
 								            "slogan": "Calculates word mover's distance insanely fast",
 								            "description": "Calculates Word Mover's Distance as described in [From Word Embeddings To Document Distances](http://www.cs.cornell.edu/~kilian/papers/wmd_metric.pdf) by Matt Kusner, Yu Sun, Nicholas Kolkin and Kilian Weinberger.\n\n⚠️ **This package is currently only compatible with spaCy v.1x.**",
 								            "github": "src-d/wmd-relax",
 								            "thumb": "https://i.imgur.com/f91C3Lf.jpg",
 								            "code_example": [
 								                "import spacy",
 								                "import wmd",
 								                "",
 								                "nlp = spacy.load('en', create_pipeline=wmd.WMD.create_spacy_pipeline)",
 								                "doc1 = nlp(\"Politician speaks to the media in Illinois.\")",
 								                "doc2 = nlp(\"The president greets the press in Chicago.\")",
 								                "print(doc1.similarity(doc2))"
 								            ],
 								            "author": "source{d}",
 								            "author_links": {
 								                "github": "src-d",
 								                "twitter": "sourcedtech",
 								                "website": "https://sourced.tech"
 								            },
 								            "category": ["pipeline"]
 								        },
 								        {
 								            "id": "neuralcoref",
 								            "slogan": "State-of-the-art coreference resolution based on neural nets and spaCy",
-												update neuralcoref example (#4317)


											
										
										
											2019-09-24 11:47:17 +03:00
+								            "description": "This coreference resolution module is based on the super fast [spaCy](https://spacy.io/) parser and uses the neural net scoring model described in [Deep Reinforcement Learning for Mention-Ranking Coreference Models](http://cs.stanford.edu/people/kevclark/resources/clark-manning-emnlp2016-deep.pdf) by Kevin Clark and Christopher D. Manning, EMNLP 2016. Since ✨Neuralcoref v2.0, you can train the coreference resolution system on your own dataset — e.g., another language than English! — **provided you have an annotated dataset**. Note that to use neuralcoref with spaCy > 2.1.0, you'll have to install neuralcoref from source.",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            "github": "huggingface/neuralcoref",
 								            "thumb": "https://i.imgur.com/j6FO9O6.jpg",
 								            "code_example": [
-												update neuralcoref example (#4317)


											
										
										
											2019-09-24 11:47:17 +03:00
+								                "import spacy",
 								                "import neuralcoref",
 								                "",
 								                "nlp = spacy.load('en')",
 								                "neuralcoref.add_to_pipe(nlp)",
 								                "doc1 = nlp('My sister has a dog. She loves him.')",
 								                "print(doc1._.coref_clusters)",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								                "",
-												update neuralcoref example (#4317)


											
										
										
											2019-09-24 11:47:17 +03:00
+								                "doc2 = nlp('Angela lives in Boston. She is quite happy in that city.')",
 								                "for ent in doc2.ents:",
 								                "    print(ent._.coref_cluster)"
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            ],
 								            "author": "Hugging Face",
 								            "author_links": {
 								                "github": "huggingface"
 								            },
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "category": ["standalone", "conversational", "models"],
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            "tags": ["coref"]
 								        },
 								        {
 								            "id": "neuralcoref-vizualizer",
 								            "title": "Neuralcoref Visualizer",
 								            "slogan": "State-of-the-art coreference resolution based on neural nets and spaCy",
 								            "description": "In short, coreference is the fact that two or more expressions in a text – like pronouns or nouns – link to the same person or thing. It is a classical Natural language processing task, that has seen a revival of interest in the past two years as several research groups applied cutting-edge deep-learning and reinforcement-learning techniques to it. It is also one of the key building blocks to building conversational Artificial intelligences.",
 								            "url": "https://huggingface.co/coref/",
 								            "image": "https://i.imgur.com/3yy4Qyf.png",
 								            "thumb": "https://i.imgur.com/j6FO9O6.jpg",
 								            "github": "huggingface/neuralcoref",
 								            "category": ["visualizers", "conversational"],
 								            "tags": ["coref", "chatbots"],
 								            "author": "Hugging Face",
 								            "author_links": {
 								                "github": "huggingface"
 								            }
 								        },
 								        {
 								            "id": "spacy-vis",
 								            "slogan": "A visualisation tool for spaCy using Hierplane",
 								            "description": "A visualiser for spaCy annotations. This visualisation uses the [Hierplane](https://allenai.github.io/hierplane/) Library to render the dependency parse from spaCy's models. It also includes visualisation of entities and POS tags within nodes.",
 								            "github": "DeNeutoy/spacy-vis",
 								            "url": "http://spacyvis.allennlp.org/spacy-parser",
 								            "thumb": "https://i.imgur.com/DAG9QFd.jpg",
 								            "image": "https://raw.githubusercontent.com/DeNeutoy/spacy-vis/master/img/example.gif",
 								            "author": "Mark Neumann",
 								            "author_links": {
 								                "twitter": "MarkNeumannnn",
 								                "github": "DeNeutoy"
 								            },
 								            "category": ["visualizers"]
 								        },
 								        {
 								            "id": "matcher-explorer",
 								            "title": "Rule-based Matcher Explorer",
 								            "slogan": "Test spaCy's rule-based Matcher by creating token patterns interactively",
-												Fix links [ci skip]

											
										
										
											2019-02-18 00:25:50 +03:00
+								            "description": "Test spaCy's rule-based `Matcher` by creating token patterns interactively and running them over your text. Each token can set multiple attributes like text value, part-of-speech tag or boolean flags. The token-based view lets you explore how spaCy processes your text – and why your pattern matches, or why it doesn't. For more details on rule-based matching, see the [documentation](https://spacy.io/usage/rule-based-matching).",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            "image": "https://explosion.ai/assets/img/demos/matcher.png",
 								            "thumb": "https://i.imgur.com/rPK4AGt.jpg",
 								            "url": "https://explosion.ai/demos/matcher",
 								            "author": "Ines Montani",
 								            "author_links": {
 								                "twitter": "_inesmontani",
 								                "github": "ines",
 								                "website": "https://ines.io"
 								            },
 								            "category": ["visualizers"]
 								        },
 								        {
 								            "id": "displacy",
 								            "title": "displaCy",
 								            "slogan": "A modern syntactic dependency visualizer",
 								            "description": "Visualize spaCy's guess at the syntactic structure of a sentence. Arrows point from children to heads, and are labelled by their relation type.",
 								            "url": "https://explosion.ai/demos/displacy",
 								            "thumb": "https://i.imgur.com/nxDcHaL.jpg",
 								            "image": "https://explosion.ai/assets/img/demos/displacy.png",
 								            "author": "Ines Montani",
 								            "author_links": {
 								                "twitter": "_inesmontani",
 								                "github": "ines",
 								                "website": "https://ines.io"
 								            },
 								            "category": ["visualizers"]
 								        },
 								        {
 								            "id": "displacy-ent",
 								            "title": "displaCy ENT",
 								            "slogan": "A modern named entity visualizer",
 								            "description": "Visualize spaCy's guess at the named entities in the document. You can filter the displayed types, to only show the annotations you're interested in.",
 								            "url": "https://explosion.ai/demos/displacy-ent",
 								            "thumb": "https://i.imgur.com/A77Ecbs.jpg",
 								            "image": "https://explosion.ai/assets/img/demos/displacy-ent.png",
 								            "author": "Ines Montani",
 								            "author_links": {
 								                "twitter": "_inesmontani",
 								                "github": "ines",
 								                "website": "https://ines.io"
 								            },
 								            "category": ["visualizers"]
 								        },
 								        {
 								            "id": "explacy",
 								            "slogan": "A small tool that explains spaCy parse results",
 								            "github": "tylerneylon/explacy",
 								            "thumb": "https://i.imgur.com/V1hCWmn.jpg",
 								            "image": "https://raw.githubusercontent.com/tylerneylon/explacy/master/img/screenshot.png",
 								            "code_example": [
 								                "import spacy",
 								                "import explacy",
 								                "",
 								                "nlp = spacy.load('en')",
 								                "explacy.print_parse_info(nlp, 'The salad was surprisingly tasty.')"
 								            ],
 								            "author": "Tyler Neylon",
 								            "author_links": {
 								                "github": "tylerneylon"
 								            },
 								            "category": ["visualizers"]
 								        },
-												Add scattertext [ci skip]

											
										
										
											2018-05-07 20:10:23 +03:00
+								        {
 								            "id": "scattertext",
 								            "slogan": "Beautiful visualizations of how language differs among document types",
 								            "description": "A tool for finding distinguishing terms in small-to-medium-sized corpora, and presenting them in a sexy, interactive scatter plot with non-overlapping term labels. Exploratory data analysis just got more fun.",
 								            "github": "JasonKessler/scattertext",
 								            "image": "https://jasonkessler.github.io/2012conventions0.0.2.2.png",
 								            "code_example": [
 								                "import spacy",
 								                "import scattertext as st",
 								                "",
 								                "nlp = spacy.load('en')",
 								                "corpus = st.CorpusFromPandas(convention_df,",
 								                "                             category_col='party',",
 								                "                             text_col='text',",
 								                "                             nlp=nlp).build()"
 								            ],
 								            "author": "Jason Kessler",
 								            "author_links": {
 								                "github": "JasonKessler",
 								                "twitter": "jasonkessler"
 								            },
 								            "category": ["visualizers"]
 								        },
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        {
 								            "id": "rasa",
-												Update information about Rasa (#4492)

Rasa has been updated and rasa core and rasa nlu have been merged.
											
										
										
											2019-10-22 15:32:31 +03:00
+								            "title": "Rasa",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            "slogan": "Turn natural language into structured data",
-												Update information about Rasa (#4492)

Rasa has been updated and rasa core and rasa nlu have been merged.
											
										
										
											2019-10-22 15:32:31 +03:00
+								            "description": "Machine learning tools for developers to build, improve, and deploy contextual chatbots and assistants. Powered by open source.",
 								            "github": "RasaHQ/rasa",
 								            "pip": "rasa",
 								            "thumb": "https://i.imgur.com/TyZnpwL.png",
 								            "url": "https://rasa.com/",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            "author": "Rasa",
 								            "author_links": {
 								                "github": "RasaHQ"
 								            },
 								            "category": ["conversational"],
 								            "tags": ["chatbots"]
 								        },
 								        {
-												Small doc fixes (#5250)

* fix link

* torchtext instead tochtext
											
										
										
											2020-04-03 14:01:43 +03:00
+								            "id": "torchtext",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            "title": "torchtext",
 								            "slogan": "Data loaders and abstractions for text and NLP",
 								            "github": "pytorch/text",
 								            "pip": "torchtext",
 								            "thumb": "https://i.imgur.com/WFkxuPo.png",
 								            "code_example": [
 								                ">>> pos = data.TabularDataset(",
 								                "...    path='data/pos/pos_wsj_train.tsv', format='tsv',",
 								                "...    fields=[('text', data.Field()),",
 								                "...            ('labels', data.Field())])",
 								                "...",
 								                ">>> sentiment = data.TabularDataset(",
 								                "...    path='data/sentiment/train.json', format='json',",
 								                "...    fields={'sentence_tokenized': ('text', data.Field(sequential=True)),",
 								                "...            'sentiment_gold': ('labels', data.Field(sequential=False))})"
 								            ],
 								            "category": ["standalone", "research"],
 								            "tags": ["pytorch"]
 								        },
 								        {
 								            "id": "allennlp",
 								            "title": "AllenNLP",
 								            "slogan": "An open-source NLP research library, built on PyTorch and spaCy",
 								            "description": "AllenNLP is a new library designed to accelerate NLP research, by providing a framework that supports modern deep learning workflows for cutting-edge language understanding problems. AllenNLP uses spaCy as a preprocessing component. You can also use Allen NLP to develop spaCy pipeline components, to add annotations to the `Doc` object.",
 								            "github": "allenai/allennlp",
 								            "pip": "allennlp",
 								            "thumb": "https://i.imgur.com/U8opuDN.jpg",
 								            "url": "http://allennlp.org",
 								            "author": " Allen Institute for Artificial Intelligence",
 								            "author_links": {
 								                "github": "allenai",
 								                "twitter": "allenai_org",
 								                "website": "http://allenai.org"
 								            },
 								            "category": ["standalone", "research"]
 								        },
-												Update universe [ci skip]

											
										
										
											2019-03-12 13:13:03 +03:00
+								        {
 								            "id": "scispacy",
 								            "title": "scispaCy",
 								            "slogan": "A full spaCy pipeline and models for scientific/biomedical documents",
 								            "github": "allenai/scispacy",
 								            "pip": "scispacy",
 								            "thumb": "https://i.imgur.com/dJQSclW.png",
 								            "url": "https://allenai.github.io/scispacy/",
 								            "author": " Allen Institute for Artificial Intelligence",
 								            "author_links": {
 								                "github": "allenai",
 								                "twitter": "allenai_org",
 								                "website": "http://allenai.org"
 								            },
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "category": ["scientific", "models", "research"]
-												Update universe [ci skip]

											
										
										
											2019-03-12 13:13:03 +03:00
+								        },
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        {
 								            "id": "textacy",
 								            "slogan": "NLP, before and after spaCy",
 								            "description": "`textacy` is a Python library for performing a variety of natural language processing (NLP) tasks, built on the high-performance `spacy` library. With the fundamentals – tokenization, part-of-speech tagging, dependency parsing, etc. – delegated to another library, `textacy` focuses on the tasks that come before and follow after.",
 								            "github": "chartbeat-labs/textacy",
 								            "pip": "textacy",
 								            "url": "https://chartbeat-labs.github.io/textacy/",
 								            "author": "Burton DeWilde",
 								            "author_links": {
 								                "github": "bdewilde",
 								                "twitter": "bjdewilde"
 								            },
 								            "category": ["standalone"]
 								        },
-												adds textpipe to universe (#3500) [ci skip]

* Adds textpipe to universe

* signed contributor agreement

* Adjust formatting, code style and use "standalone" category

											
										
										
											2019-03-28 17:13:19 +03:00
+								        {
 								            "id": "textpipe",
 								            "slogan": "clean and extract metadata from text",
 								            "description": "`textpipe` is a Python package for converting raw text in to clean, readable text and extracting metadata from that text. Its functionalities include transforming raw text into readable text by removing HTML tags and extracting metadata such as the number of words and named entities from the text.",
 								            "github": "textpipe/textpipe",
 								            "pip": "textpipe",
 								            "author": "Textpipe Contributors",
 								            "author_links": {
 								                "github": "textpipe",
 								                "website": "https://github.com/textpipe/textpipe/blob/master/CONTRIBUTORS.md"
 								            },
 								            "category": ["standalone"],
 								            "tags": ["text-processing", "named-entity-recognition"],
 								            "thumb": "https://avatars0.githubusercontent.com/u/40492530",
 								            "code_example": [
 								                "from textpipe import doc, pipeline",
 								                "sample_text = 'Sample text! <!DOCTYPE>'",
 								                "document = doc.Doc(sample_text)",
 								                "print(document.clean)",
 								                "'Sample text!'",
 								                "print(document.language)",
 								                "# 'en'",
 								                "print(document.nwords)",
 								                "# 2",
 								                "",
 								                "pipe = pipeline.Pipeline(['CleanText', 'NWords'])",
 								                "print(pipe(sample_text))",
 								                "# {'CleanText': 'Sample text!', 'NWords': 2}"
 								            ]
 								        },
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        {
 								            "id": "mordecai",
 								            "slogan": "Full text geoparsing using spaCy, Geonames and Keras",
 								            "description": "Extract the place names from a piece of text, resolve them to the correct place, and return their coordinates and structured geographic information.",
 								            "github": "openeventdata/mordecai",
 								            "pip": "mordecai",
 								            "thumb": "https://i.imgur.com/gPJ9upa.jpg",
 								            "code_example": [
 								                "from mordecai import Geoparser",
 								                "geo = Geoparser()",
 								                "geo.geoparse(\"I traveled from Oxford to Ottawa.\")"
 								            ],
 								            "author": "Andy Halterman",
 								            "author_links": {
 								                "github": "ahalterman",
 								                "twitter": "ahalterman"
 								            },
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "category": ["standalone", "scientific"]
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        },
 								        {
 								            "id": "kindred",
 								            "title": "Kindred",
 								            "slogan": "Biomedical relation extraction using spaCy",
 								            "description": "Kindred is a package for relation extraction in biomedical texts. Given some training data, it can build a model to identify relations between entities (e.g. drugs, genes, etc) in a sentence.",
 								            "github": "jakelever/kindred",
 								            "pip": "kindred",
 								            "code_example": [
 								                "import kindred",
 								                "",
 								                "trainCorpus = kindred.bionlpst.load('2016-BB3-event-train')",
 								                "devCorpus = kindred.bionlpst.load('2016-BB3-event-dev')",
 								                "predictionCorpus = devCorpus.clone()",
 								                "predictionCorpus.removeRelations()",
 								                "classifier = kindred.RelationClassifier()",
 								                "classifier.train(trainCorpus)",
 								                "classifier.predict(predictionCorpus)",
 								                "f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')"
 								            ],
 								            "author": "Jake Lever",
 								            "author_links": {
 								                "github": "jakelever"
 								            },
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "category": ["standalone", "scientific"]
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        },
 								        {
 								            "id": "sense2vec",
 								            "slogan": "Use NLP to go beyond vanilla word2vec",
 								            "description": "sense2vec ([Trask et. al](https://arxiv.org/abs/1511.06388), 2015) is a nice twist on [word2vec](https://en.wikipedia.org/wiki/Word2vec) that lets you learn more interesting, detailed and context-sensitive word vectors. For an interactive example of the technology, see our [sense2vec demo](https://explosion.ai/demos/sense2vec) that lets you explore semantic similarities across all Reddit comments of 2015.",
 								            "github": "explosion/sense2vec",
-												update sense2vec version (#4320)


											
										
										
											2019-09-25 13:17:54 +03:00
+								            "pip": "sense2vec==1.0.0a1",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            "thumb": "https://i.imgur.com/awfdhX6.jpg",
 								            "image": "https://explosion.ai/assets/img/demos/sense2vec.png",
 								            "url": "https://explosion.ai/demos/sense2vec",
 								            "code_example": [
 								                "import spacy",
 								                "from sense2vec import Sense2VecComponent",
 								                "",
 								                "nlp = spacy.load('en')",
 								                "s2v = Sense2VecComponent('/path/to/reddit_vectors-1.1.0')",
 								                "nlp.add_pipe(s2v)",
 								                "",
-												Remove u-strings and fix formatting [ci skip]

											
										
										
											2019-09-12 17:11:15 +03:00
+								                "doc = nlp(\"A sentence about natural language processing.\")",
 								                "assert doc[3].text == 'natural language processing'",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								                "freq = doc[3]._.s2v_freq",
 								                "vector = doc[3]._.s2v_vec",
 								                "most_similar = doc[3]._.s2v_most_similar(3)",
 								                "# [(('natural language processing', 'NOUN'), 1.0),",
 								                "#  (('machine learning', 'NOUN'), 0.8986966609954834),",
 								                "#  (('computer vision', 'NOUN'), 0.8636297583580017)]"
 								            ],
 								            "category": ["pipeline", "standalone", "visualizers"],
 								            "tags": ["vectors"],
-												Update universe.json [ci skip]

											
										
										
											2019-08-05 15:30:07 +03:00
+								            "author": "Explosion",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            "author_links": {
 								                "twitter": "explosion_ai",
 								                "github": "explosion",
 								                "website": "https://explosion.ai"
 								            }
 								        },
 								        {
 								            "id": "spacyr",
 								            "slogan": "An R wrapper for spaCy",
 								            "github": "quanteda/spacyr",
 								            "cran": "spacyr",
 								            "code_example": [
 								                "library(\"spacyr\")",
 								                "spacy_initialize()",
 								                "",
 								                "txt <- c(d1 = \"spaCy excels at large-scale information extraction tasks.\",",
 								                "         d2 = \"Mr. Smith goes to North Carolina.\")",
 								                "",
 								                "# process documents and obtain a data.table",
 								                "parsedtxt <- spacy_parse(txt)"
 								            ],
 								            "code_language": "r",
 								            "author": "Kenneth Benoit & Aki Matsuo",
 								            "category": ["nonpython"]
 								        },
 								        {
 								            "id": "cleannlp",
 								            "title": "CleanNLP",
 								            "slogan": "A tidy data model for NLP in R",
 								            "description": "The cleanNLP package is designed to make it as painless as possible to turn raw text into feature-rich data frames. the package offers four backends that can be used for parsing text: `tokenizers`, `udpipe`, `spacy` and `corenlp`.",
 								            "github": "statsmaths/cleanNLP",
 								            "cran": "cleanNLP",
 								            "author": "Taylor B. Arnold",
 								            "author_links": {
 								                "github": "statsmaths"
 								            },
 								            "category": ["nonpython"]
 								        },
 								        {
 								            "id": "spacy-cpp",
 								            "slogan": "C++ wrapper library for spaCy",
 								            "description": "The goal of spacy-cpp is to expose the functionality of spaCy to C++ applications, and to provide an API that is similar to that of spaCy, enabling rapid development in Python and simple porting to C++.",
 								            "github": "d99kris/spacy-cpp",
 								            "code_example": [
 								                "Spacy::Spacy spacy;",
 								                "auto nlp = spacy.load(\"en_core_web_sm\");",
 								                "auto doc = nlp.parse(\"This is a sentence.\");",
 								                "for (auto& token : doc.tokens())",
 								                "    std::cout << token.text() << \" [\" << token.pos_() << \"]\\n\";"
 								            ],
 								            "code_language": "cpp",
 								            "author": "Kristofer Berggren",
 								            "author_links": {
 								                "github": "d99kris"
 								            },
 								            "category": ["nonpython"]
 								        },
 								        {
 								            "id": "spaCy.jl",
 								            "slogan": "Julia interface for spaCy (work in progress)",
 								            "github": "jekbradbury/SpaCy.jl",
 								            "author": "James Bradbury",
 								            "author_links": {
 								                "github": "jekbradbury",
 								                "twitter": "jekbradbury"
 								            },
 								            "category": ["nonpython"]
 								        },
 								        {
 								            "id": "spacy_api",
 								            "slogan": "Server/client to load models in a separate, dedicated process",
 								            "github": "kootenpv/spacy_api",
 								            "pip": "spacy_api",
 								            "code_example": [
 								                "from spacy_api import Client",
 								                "",
 								                "spacy_client = Client() # default args host/port",
 								                "doc = spacy_client.single(\"How are you\")"
 								            ],
 								            "author": "Pascal van Kooten",
 								            "author_links": {
 								                "github": "kootenpv"
 								            },
 								            "category": ["apis"]
 								        },
 								        {
 								            "id": "spacy-api-docker",
 								            "slogan": "spaCy REST API, wrapped in a Docker container",
 								            "github": "jgontrum/spacy-api-docker",
 								            "url": "https://hub.docker.com/r/jgontrum/spacyapi/",
 								            "thumb": "https://i.imgur.com/NRnDKyj.jpg",
 								            "code_example": [
 								                "version: '2'",
 								                "",
 								                "services:",
 								                "  spacyapi:",
 								                "    image: jgontrum/spacyapi:en_v2",
 								                "    ports:",
 								                "      - \"127.0.0.1:8080:80\"",
 								                "    restart: always"
 								            ],
 								            "code_language": "docker",
 								            "author": "Johannes Gontrum",
 								            "author_links": {
 								                "github": "jgontrum"
 								            },
 								            "category": ["apis"]
 								        },
 								        {
 								            "id": "languagecrunch",
 								            "slogan": "NLP server for spaCy, WordNet and NeuralCoref as a Docker image",
 								            "github": "artpar/languagecrunch",
 								            "code_example": [
 								                "docker run -it -p 8080:8080 artpar/languagecrunch",
 								                "curl http://localhost:8080/nlp/parse?`echo -n \"The new twitter is so weird. Seriously. Why is there a new twitter? What was wrong with the old one? Fix it now.\" | python -c \"import urllib, sys; print(urllib.urlencode({'sentence': sys.stdin.read()}))\"`"
 								            ],
 								            "code_language": "bash",
 								            "author": "Parth Mudgal",
 								            "author_links": {
 								                "github": "artpar"
 								            },
 								            "category": ["apis"]
 								        },
 								        {
 								            "id": "spacy-nlp",
 								            "slogan": " Expose spaCy NLP text parsing to Node.js (and other languages) via Socket.IO",
 								            "github": "kengz/spacy-nlp",
 								            "thumb": "https://i.imgur.com/w41VSr7.jpg",
 								            "code_example": [
 								                "const spacyNLP = require(\"spacy-nlp\")",
 								                "// default port 6466",
 								                "// start the server with the python client that exposes spacyIO (or use an existing socketIO server at IOPORT)",
 								                "var serverPromise = spacyNLP.server({ port: process.env.IOPORT });",
 								                "// Loading spacy may take up to 15s"
 								            ],
 								            "code_language": "javascript",
 								            "author": "Wah Loon Keng",
 								            "author_links": {
 								                "github": "kengz"
 								            },
 								            "category": ["apis", "nonpython"]
 								        },
 								        {
 								            "id": "prodigy",
 								            "title": "Prodigy",
 								            "slogan": "Radically efficient machine teaching, powered by active learning",
 								            "description": "Prodigy is an annotation tool so efficient that data scientists can do the annotation themselves, enabling a new level of rapid iteration. Whether you're working on entity recognition, intent detection or image classification, Prodigy can help you train and evaluate your models faster. Stream in your own examples or real-world data from live APIs, update your model in real-time and chain models together to build more complex systems.",
 								            "thumb": "https://i.imgur.com/UVRtP6g.jpg",
 								            "image": "https://i.imgur.com/Dt5vrY6.png",
 								            "url": "https://prodi.gy",
 								            "code_example": [
 								                "prodigy dataset ner_product \"Improve PRODUCT on Reddit data\"",
 								                "✨ Created dataset 'ner_product'.",
 								                "",
 								                "prodigy ner.teach ner_product en_core_web_sm ~/data.jsonl --label PRODUCT",
 								                "✨ Starting the web server on port 8080..."
 								            ],
 								            "code_language": "bash",
-												Add ExcelCy into Universe list (#2572)

Hi guys,

This is my first spaCy extension. I am excited to able to do this. Please do let me know if there is any suggestions or modifications I need to do. Feel free to use/contribute the repo that I made.

## Description
ExcelCy is a SpaCy toolkit to help improve the data training experiences. It provides easy annotation using Excel file format. It has helper to pre-train entity annotation with phrase and regex matcher pipe.

### Types of change
Update to Universe list in website.

## Checklist
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-07-19 20:28:33 +03:00
+								            "category": ["standalone", "training"],
-												Update universe.json [ci skip]

											
										
										
											2019-08-05 15:30:07 +03:00
+								            "author": "Explosion",
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								            "author_links": {
 								                "twitter": "explosion_ai",
 								                "github": "explosion",
 								                "website": "https://explosion.ai"
 								            }
 								        },
 								        {
 								            "id": "dragonfire",
 								            "title": "Dragonfire",
 								            "slogan": "An open-source virtual assistant for Ubuntu based Linux distributions",
 								            "github": "DragonComputer/Dragonfire",
 								            "thumb": "https://i.imgur.com/5fqguKS.jpg",
 								            "image": "https://raw.githubusercontent.com/DragonComputer/Dragonfire/master/docs/img/demo.gif",
 								            "author": "Dragon Computer",
 								            "author_links": {
 								                "github": "DragonComputer",
 								                "website": "http://dragon.computer"
 								            },
 								            "category": ["standalone"]
 								        },
-												Update universe [ci skip]

											
										
										
											2019-06-02 14:55:55 +03:00
+								        {
 								            "id": "prefect",
 								            "title": "Prefect",
 								            "slogan": "Workflow management system designed for modern infrastructure",
 								            "github": "PrefectHQ/prefect",
 								            "pip": "prefect",
 								            "thumb": "https://i.imgur.com/oLTwr0e.png",
 								            "code_example": [
 								                "from prefect import Flow",
 								                "from prefect.tasks.spacy.spacy_tasks import SpacyNLP",
 								                "import spacy",
 								                "",
 								                "nlp = spacy.load(\"en_core_web_sm\")",
 								                "",
 								                "with Flow(\"Natural Language Processing\") as flow:",
 								                "    doc = SpacyNLP(text=\"This is some text\", nlp=nlp)",
 								                "",
 								                "flow.run()"
 								            ],
 								            "author": "Prefect",
 								            "author_links": {
 								                "website": "https://prefect.io"
 								            },
 								            "category": ["standalone"]
 								        },
 								        {
 								            "id": "graphbrain",
 								            "title": "Graphbrain",
 								            "slogan": "Automated meaning extraction and text understanding",
 								            "description": "Graphbrain is an Artificial Intelligence open-source software library and scientific research tool. Its aim is to facilitate automated meaning extraction and text understanding, as well as the exploration and inference of knowledge.",
 								            "github": "graphbrain/graphbrain",
 								            "pip": "graphbrain",
 								            "thumb": "https://i.imgur.com/cct9W1E.png",
 								            "author": "Graphbrain",
 								            "category": ["standalone"]
 								        },
-												Update universe.json (#5022)

e-book is available from https://nostarch.com/NLPPython
											
										
										
											2020-02-15 17:44:55 +03:00
+								        {
 								            "type": "education",
 								            "id": "nostarch-nlp-python",
 								            "title": "Natural Language Processing Using Python",
 								            "slogan": "No Starch Press, 2020",
 								            "description": "Natural Language Processing Using Python is an introduction to natural language processing (NLP), the task of converting human language into data that a computer can process. The book uses spaCy, a leading Python library for NLP, to guide readers through common NLP tasks related to generating and understanding human language with code. It addresses problems like understanding a user's intent, continuing a conversation with a human, and maintaining the state of a conversation.",
-												Auto-format and fix image [ci skip]

											
										
										
											2020-02-23 15:56:50 +03:00
+								            "cover": "https://i.imgur.com/w0iycjl.jpg",
-												Update universe.json (#5022)

e-book is available from https://nostarch.com/NLPPython
											
										
										
											2020-02-15 17:44:55 +03:00
+								            "url": "https://nostarch.com/NLPPython",
 								            "author": "Yuli Vasiliev",
 								            "category": ["books"]
-												Auto-format and fix image [ci skip]

											
										
										
											2020-02-23 15:56:50 +03:00
+								        },
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        {
 								            "type": "education",
 								            "id": "oreilly-python-ds",
 								            "title": "Introduction to Machine Learning with Python: A Guide for Data Scientists",
 								            "slogan": "O'Reilly, 2016",
 								            "description": "Machine learning has become an integral part of many commercial applications and research projects, but this field is not exclusive to large companies with extensive research teams. If you use Python, even as a beginner, this book will teach you practical ways to build your own machine learning solutions. With all the data available today, machine learning applications are limited only by your imagination.",
 								            "cover": "https://covers.oreillystatic.com/images/0636920030515/lrg.jpg",
 								            "url": "http://shop.oreilly.com/product/0636920030515.do",
 								            "author": "Andreas Müller, Sarah Guido",
 								            "category": ["books"]
 								        },
 								        {
 								            "type": "education",
 								            "id": "text-analytics-python",
 								            "title": "Text Analytics with Python",
 								            "slogan": "Apress / Springer, 2016",
 								            "description": "*Text Analytics with Python* teaches you the techniques related to natural language processing and text analytics, and you will gain the skills to know which technique is best suited to solve a particular problem. You will look at each technique and algorithm with both a bird's eye view to understand how it can be used as well as with a microscopic view to understand the mathematical concepts and to implement them to solve your own problems.",
 								            "github": "dipanjanS/text-analytics-with-python",
 								            "cover": "https://i.imgur.com/AOmzZu8.png",
 								            "url": "https://www.amazon.com/Text-Analytics-Python-Real-World-Actionable/dp/148422387X",
 								            "author": "Dipanjan Sarkar",
 								            "category": ["books"]
 								        },
 								        {
 								            "type": "education",
 								            "id": "practical-ml-python",
 								            "title": "Practical Machine Learning with Python",
 								            "slogan": "Apress, 2017",
 								            "description": "Master the essential skills needed to recognize and solve complex problems with machine learning and deep learning. Using real-world examples that leverage the popular Python machine learning ecosystem, this book is your perfect companion for learning the art and science of machine learning to become a successful practitioner. The concepts, techniques, tools, frameworks, and methodologies used in this book will teach you how to think, design, build, and execute machine learning systems and projects successfully.",
 								            "github": "dipanjanS/practical-machine-learning-with-python",
 								            "cover": "https://i.imgur.com/5F4mkt7.jpg",
 								            "url": "https://www.amazon.com/Practical-Machine-Learning-Python-Problem-Solvers/dp/1484232062",
 								            "author": "Dipanjan Sarkar, Raghav Bali, Tushar Sharma",
 								            "category": ["books"]
 								        },
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								        {
 								            "type": "education",
 								            "id": "packt-nlp-computational-linguistics",
 								            "title": "Natural Language Processing and Computational Linguistics",
 								            "slogan": "Packt, 2018",
 								            "description": "This book shows you how to use natural language processing, and computational linguistics algorithms, to make inferences and gain insights about data you have. These algorithms are based on statistical machine learning and artificial intelligence techniques. The tools to work with these algorithms are available to you right now - with Python, and tools like Gensim and spaCy.",
 								            "cover": "https://i.imgur.com/aleMf1Y.jpg",
 								            "url": "https://www.amazon.com/Natural-Language-Processing-Computational-Linguistics-ebook/dp/B07BWH779J",
 								            "author": "Bhargav Srinivasa-Desikan",
 								            "category": ["books"]
 								        },
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        {
 								            "type": "education",
 								            "id": "learning-path-spacy",
 								            "title": "Learning Path: Mastering spaCy for Natural Language Processing",
 								            "slogan": "O'Reilly, 2017",
 								            "description": "spaCy, a fast, user-friendly library for teaching computers to understand text, simplifies NLP techniques, such as speech tagging and syntactic dependencies, so you can easily extract information, attributes, and objects from massive amounts of text to then document, measure, and analyze. This Learning Path is a hands-on introduction to using spaCy to discover insights through natural language processing. While end-to-end natural language processing solutions can be complex, you’ll learn the linguistics, algorithms, and machine learning skills to get the job done.",
 								            "url": "https://www.safaribooksonline.com/library/view/learning-path-mastering/9781491986653/",
 								            "thumb": "https://i.imgur.com/9MIgMAc.jpg",
 								            "author": "Aaron Kramer",
 								            "category": ["courses"]
-												Added Adam project to spaCy Universe (#2275)

* Added 5hirish to contributors

* Added Adam Qas Project to spaCy Universe

* Remove $ from code example

											
										
										
											2018-04-30 23:25:01 +03:00
+								        },
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								        {
 								            "type": "education",
 								            "id": "spacy-course",
 								            "title": "Advanced NLP with spaCy",
-												Update universe and display of videos [ci skip]

											
										
										
											2020-05-21 22:54:23 +03:00
+								            "slogan": "A free online course",
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "description": "In this free interactive course, you'll learn how to use spaCy to build advanced natural language understanding systems, using both rule-based and machine learning approaches.",
 								            "url": "https://course.spacy.io",
 								            "image": "https://i.imgur.com/JC00pHW.jpg",
 								            "thumb": "https://i.imgur.com/5RXLtrr.jpg",
 								            "author": "Ines Montani",
 								            "author_links": {
 								                "twitter": "_inesmontani",
 								                "github": "ines",
 								                "website": "https://ines.io"
 								            },
 								            "category": ["courses"]
 								        },
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								        {
 								            "type": "education",
 								            "id": "video-spacys-ner-model",
 								            "title": "spaCy's NER model",
 								            "slogan": "Incremental parsing with bloom embeddings and residual CNNs",
 								            "description": "spaCy v2.0's Named Entity Recognition system features a sophisticated word embedding strategy using subword features and \"Bloom\" embeddings, a deep convolutional neural network with residual connections, and a novel transition-based approach to named entity parsing. The system is designed to give a good balance of efficiency, accuracy and adaptability. In this talk, I sketch out the components of the system, explaining the intuition behind the various choices. I also give a brief introduction to the named entity recognition problem, with an overview of what else Explosion AI is working on, and why.",
 								            "youtube": "sqDHBH9IjRU",
 								            "author": "Matthew Honnibal",
 								            "author_links": {
 								                "twitter": "honnibal",
 								                "github": "honnibal",
 								                "website": "https://explosion.ai"
 								            },
 								            "category": ["videos"]
 								        },
 								        {
 								            "type": "education",
 								            "id": "video-new-nlp-solutions",
 								            "title": "Building new NLP solutions with spaCy and Prodigy",
 								            "slogan": "PyData Berlin 2018",
 								            "description": "In this talk, I will discuss how to address some of the most likely causes of failure for new Natural Language Processing (NLP) projects. My main recommendation is to take an iterative approach: don't assume you know what your pipeline should look like, let alone your annotation schemes or model architectures.",
 								            "author": "Matthew Honnibal",
 								            "author_links": {
 								                "twitter": "honnibal",
 								                "github": "honnibal",
 								                "website": "https://explosion.ai"
 								            },
 								            "youtube": "jpWqz85F_4Y",
 								            "category": ["videos"]
 								        },
 								        {
 								            "type": "education",
 								            "id": "video-modern-nlp-in-python",
 								            "title": "Modern NLP in Python",
 								            "slogan": "PyData DC 2016",
 								            "description": "Academic and industry research in Natural Language Processing (NLP) has progressed at an accelerating pace over the last several years. Members of the Python community have been hard at work moving cutting-edge research out of papers and into open source, \"batteries included\" software libraries that can be applied to practical problems. We'll explore some of these tools for modern NLP in Python.",
 								            "author": "Patrick Harrison",
 								            "youtube": "6zm9NC9uRkk",
 								            "category": ["videos"]
 								        },
-												Update universe and display of videos [ci skip]

											
										
										
											2020-05-21 22:54:23 +03:00
+								        {
 								            "type": "education",
 								            "id": "video-spacy-course",
 								            "title": "Advanced NLP with spaCy · A free online course",
 								            "description": "spaCy is a modern Python library for industrial-strength Natural Language Processing. In this free and interactive online course, you'll learn how to use spaCy to build advanced natural language understanding systems, using both rule-based and machine learning approaches.",
 								            "url": "https://course.spacy.io/en",
 								            "author": "Ines Montani",
 								            "author_links": {
 								                "twitter": "_inesmontani",
 								                "github": "ines"
 								            },
 								            "youtube": "THduWAnG97k",
 								            "category": ["videos"]
 								        },
 								        {
 								            "type": "education",
 								            "id": "video-spacy-course-de",
 								            "title": "Modernes NLP mit spaCy · Ein Gratis-Onlinekurs",
 								            "description": "spaCy ist eine moderne Python-Bibliothek für industriestarkes Natural Language Processing. In diesem kostenlosen und interaktiven Onlinekurs lernst du, mithilfe von spaCy fortgeschrittene Systeme für die Analyse natürlicher Sprache zu entwickeln und dabei sowohl regelbasierte Verfahren, als auch moderne Machine-Learning-Technologie einzusetzen.",
 								            "url": "https://course.spacy.io/de",
 								            "author": "Ines Montani",
 								            "author_links": {
 								                "twitter": "_inesmontani",
 								                "github": "ines"
 								            },
 								            "youtube": "K1elwpgDdls",
 								            "category": ["videos"]
 								        },
-												Fix and update universe.json [ci skip]

											
										
										
											2020-07-07 22:12:28 +03:00
+								        {
 								            "type": "education",
 								            "id": "video-spacy-course-es",
 								            "title": "NLP avanzado con spaCy · Un curso en línea gratis",
 								            "description": "spaCy es un paquete moderno de Python para hacer Procesamiento de Lenguaje Natural de potencia industrial. En este curso en línea, interactivo y gratuito, aprenderás a usar spaCy para construir sistemas avanzados de comprensión de lenguaje natural usando enfoques basados en reglas y en machine learning.",
 								            "url": "https://course.spacy.io/es",
 								            "author": "Camila Gutiérrez",
 								            "author_links": {
 								                "twitter": "Mariacamilagl30"
 								            },
 								            "youtube": "RNiLVCE5d4k",
 								            "category": ["videos"]
 								        },
-												Update universe with videos [ci skip]

											
										
										
											2019-08-21 22:35:37 +03:00
+								        {
 								            "type": "education",
 								            "id": "video-intro-to-nlp-episode-1",
-												Update universe and display of videos [ci skip]

											
										
										
											2020-05-21 22:54:23 +03:00
+								            "title": "Intro to NLP with spaCy (1)",
-												Update universe with videos [ci skip]

											
										
										
											2019-08-21 22:35:37 +03:00
+								            "slogan": "Episode 1: Data exploration",
 								            "description": "In this new video series, data science instructor Vincent Warmerdam gets started with spaCy, an open-source library for Natural Language Processing in Python. His mission: building a system to automatically detect programming languages in large volumes of text. Follow his process from the first idea to a prototype all the way to data collection and training a statistical named entity recogntion model from scratch.",
 								            "author": "Vincent Warmerdam",
 								            "author_links": {
 								                "twitter": "fishnets88",
 								                "github": "koaning"
 								            },
 								            "youtube": "WnGPv6HnBok",
 								            "category": ["videos"]
 								        },
-												Update universe.json [ci skip]

											
										
										
											2019-09-30 14:49:44 +03:00
+								        {
 								            "type": "education",
 								            "id": "video-intro-to-nlp-episode-2",
-												Update universe and display of videos [ci skip]

											
										
										
											2020-05-21 22:54:23 +03:00
+								            "title": "Intro to NLP with spaCy (2)",
-												Update universe.json [ci skip]

											
										
										
											2019-09-30 14:49:44 +03:00
+								            "slogan": "Episode 2: Rule-based Matching",
 								            "description": "In this new video series, data science instructor Vincent Warmerdam gets started with spaCy, an open-source library for Natural Language Processing in Python. His mission: building a system to automatically detect programming languages in large volumes of text. Follow his process from the first idea to a prototype all the way to data collection and training a statistical named entity recogntion model from scratch.",
 								            "author": "Vincent Warmerdam",
 								            "author_links": {
 								                "twitter": "fishnets88",
 								                "github": "koaning"
 								            },
 								            "youtube": "KL4-Mpgbahw",
 								            "category": ["videos"]
 								        },
-												Update universe and display of videos [ci skip]

											
										
										
											2020-05-21 22:54:23 +03:00
+								        {
 								            "type": "education",
 								            "id": "video-intro-to-nlp-episode-3",
 								            "title": "Intro to NLP with spaCy (3)",
 								            "slogan": "Episode 2: Evaluation",
 								            "description": "In this new video series, data science instructor Vincent Warmerdam gets started with spaCy, an open-source library for Natural Language Processing in Python. His mission: building a system to automatically detect programming languages in large volumes of text. Follow his process from the first idea to a prototype all the way to data collection and training a statistical named entity recogntion model from scratch.",
 								            "author": "Vincent Warmerdam",
 								            "author_links": {
 								                "twitter": "fishnets88",
 								                "github": "koaning"
 								            },
 								            "youtube": "4V0JDdohxAk",
 								            "category": ["videos"]
 								        },
 								        {
 								            "type": "education",
 								            "id": "video-intro-to-nlp-episode-4",
 								            "title": "Intro to NLP with spaCy (4)",
 								            "slogan": "Episode 4: Named Entity Recognition",
 								            "description": "In this new video series, data science instructor Vincent Warmerdam gets started with spaCy, an open-source library for Natural Language Processing in Python. His mission: building a system to automatically detect programming languages in large volumes of text. Follow his process from the first idea to a prototype all the way to data collection and training a statistical named entity recogntion model from scratch.",
 								            "author": "Vincent Warmerdam",
 								            "author_links": {
 								                "twitter": "fishnets88",
 								                "github": "koaning"
 								            },
 								            "youtube": "IqOJU1-_Fi0",
 								            "category": ["videos"]
 								        },
-												Fix and update universe.json [ci skip]

											
										
										
											2020-07-07 22:12:28 +03:00
+								        {
 								            "type": "education",
 								            "id": "video-intro-to-nlp-episode-5",
 								            "title": "Intro to NLP with spaCy (5)",
 								            "slogan": "Episode 5: Rules vs. Machine Learning",
 								            "description": "In this new video series, data science instructor Vincent Warmerdam gets started with spaCy, an open-source library for Natural Language Processing in Python. His mission: building a system to automatically detect programming languages in large volumes of text. Follow his process from the first idea to a prototype all the way to data collection and training a statistical named entity recogntion model from scratch.",
 								            "author": "Vincent Warmerdam",
 								            "author_links": {
 								                "twitter": "fishnets88",
 								                "github": "koaning"
 								            },
 								            "youtube": "f4sqeLRzkPg",
 								            "category": ["videos"]
 								        },
-												Update universe with videos [ci skip]

											
										
										
											2019-08-21 22:35:37 +03:00
+								        {
 								            "type": "education",
 								            "id": "video-spacy-irl-entity-linking",
 								            "title": "Entity Linking functionality in spaCy",
 								            "slogan": "spaCy IRL 2019",
 								            "url": "https://www.youtube.com/playlist?list=PLBmcuObd5An4UC6jvK_-eSl6jCvP1gwXc",
 								            "author": "Sofie Van Landeghem",
 								            "author_links": {
 								                "twitter": "OxyKodit",
 								                "github": "svlandeg"
 								            },
 								            "youtube": "PW3RJM8tDGo",
 								            "category": ["videos"]
 								        },
 								        {
 								            "type": "education",
 								            "id": "video-spacy-irl-lemmatization",
 								            "title": "Rethinking rule-based lemmatization",
 								            "slogan": "spaCy IRL 2019",
 								            "url": "https://www.youtube.com/playlist?list=PLBmcuObd5An4UC6jvK_-eSl6jCvP1gwXc",
 								            "author": "Guadalupe Romero",
 								            "author_links": {
 								                "twitter": "_guadiromero",
 								                "github": "guadi1994"
 								            },
 								            "youtube": "88zcQODyuko",
 								            "category": ["videos"]
 								        },
 								        {
 								            "type": "education",
 								            "id": "video-spacy-irl-scispacy",
 								            "title": "ScispaCy: A spaCy pipeline & models for scientific & biomedical text",
 								            "slogan": "spaCy IRL 2019",
 								            "url": "https://www.youtube.com/playlist?list=PLBmcuObd5An4UC6jvK_-eSl6jCvP1gwXc",
 								            "author": "Mark Neumann",
 								            "author_links": {
 								                "twitter": "MarkNeumannnn",
 								                "github": "DeNeutoy"
 								            },
 								            "youtube": "2_HSKDALwuw",
 								            "category": ["videos"]
 								        },
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								        {
 								            "type": "education",
 								            "id": "podcast-nlp-highlights",
-												Update universe [ci skip]

											
										
										
											2019-06-04 12:15:51 +03:00
+								            "title": "NLP Highlights #78: Where do corpora come from?",
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								            "slogan": "January 2019",
 								            "description": "Most NLP projects rely crucially on the quality of annotations used for training and evaluating models. In this episode, Matt and Ines of Explosion AI tell us how Prodigy can improve data annotation and model development workflows. Prodigy is an annotation tool implemented as a python library, and it comes with a web application and a command line interface. A developer can define input data streams and design simple annotation interfaces. Prodigy can help break down complex annotation decisions into a series of binary decisions, and it provides easy integration with spaCy models. Developers can specify how models should be modified as new annotations come in in an active learning framework.",
 								            "soundcloud": "559200912",
 								            "thumb": "https://i.imgur.com/hOBQEzc.jpg",
-												Update universe [ci skip]

											
										
										
											2019-03-12 13:13:03 +03:00
+								            "url": "https://soundcloud.com/nlp-highlights/78-where-do-corpora-come-from-with-matt-honnibal-and-ines-montani",
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								            "author": "Matt Gardner, Waleed Ammar (Allen AI)",
 								            "author_links": {
 								                "website": "https://soundcloud.com/nlp-highlights"
 								            },
 								            "category": ["podcasts"]
 								        },
 								        {
 								            "type": "education",
 								            "id": "podcast-init",
-												Update universe [ci skip]

											
										
										
											2019-06-04 12:15:51 +03:00
+								            "title": "Podcast.__init__ #87: spaCy with Matthew Honnibal",
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								            "slogan": "December 2017",
 								            "description": "As the amount of text available on the internet and in businesses continues to increase, the need for fast and accurate language analysis becomes more prominent. This week Matthew Honnibal, the creator of SpaCy, talks about his experiences researching natural language processing and creating a library to make his findings accessible to industry.",
 								            "iframe": "https://www.pythonpodcast.com/wp-content/plugins/podlove-podcasting-plugin-for-wordpress/lib/modules/podlove_web_player/player_v4/dist/share.html?episode=https://www.pythonpodcast.com/?podlove_player4=176",
 								            "iframe_height": 200,
 								            "thumb": "https://i.imgur.com/rpo6BuY.png",
-												Update universe [ci skip]

											
										
										
											2019-03-12 13:13:03 +03:00
+								            "url": "https://www.podcastinit.com/episode-87-spacy-with-matthew-honnibal/",
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								            "author": "Tobias Macey",
 								            "author_links": {
 								                "website": "https://www.podcastinit.com"
 								            },
 								            "category": ["podcasts"]
 								        },
-												Update universe and display of videos [ci skip]

											
										
										
											2020-05-21 22:54:23 +03:00
+								        {
 								            "type": "education",
 								            "id": "podcast-init2",
 								            "title": "Podcast.__init__ #256: An Open Source Toolchain For NLP From Explosion AI",
 								            "slogan": "March 2020",
 								            "description": "The state of the art in natural language processing is a constantly moving target. With the rise of deep learning, previously cutting edge techniques have given way to robust language models. Through it all the team at Explosion AI have built a strong presence with the trifecta of SpaCy, Thinc, and Prodigy to support fast and flexible data labeling to feed deep learning models and performant and scalable text processing. In this episode founder and open source author Matthew Honnibal shares his experience growing a business around cutting edge open source libraries for the machine learning developent process.",
 								            "iframe": "https://cdn.podlove.org/web-player/share.html?episode=https%3A%2F%2Fwww.pythonpodcast.com%2F%3Fpodlove_player4%3D614",
 								            "iframe_height": 200,
 								            "thumb": "https://i.imgur.com/rpo6BuY.png",
 								            "url": "https://www.pythonpodcast.com/explosion-ai-natural-language-processing-episode-256/",
 								            "author": "Tobias Macey",
 								            "author_links": {
 								                "website": "https://www.podcastinit.com"
 								            },
 								            "category": ["podcasts"]
 								        },
-												Update universe [ci skip]

											
										
										
											2019-03-12 13:13:03 +03:00
+								        {
 								            "type": "education",
 								            "id": "talk-python-podcast",
-												Update universe [ci skip]

											
										
										
											2019-06-04 12:15:51 +03:00
+								            "title": "Talk Python #202: Building a software business",
-												Update universe [ci skip]

											
										
										
											2019-03-12 13:13:03 +03:00
+								            "slogan": "March 2019",
 								            "description": "One core question around open source is how do you fund it? Well, there is always that PayPal donate button. But that's been a tremendous failure for many projects. Often the go-to answer is consulting. But what if you don't want to trade time for money? You could take things up a notch and change the equation, exchanging value for money. That's what Ines Montani and her co-founder did when they started Explosion AI with spaCy as the foundation.",
 								            "thumb": "https://i.imgur.com/q1twuK8.png",
 								            "url": "https://talkpython.fm/episodes/show/202/building-a-software-business",
 								            "soundcloud": "588364857",
 								            "author": "Michael Kennedy",
 								            "author_links": {
 								                "website": "https://talkpython.fm/"
 								            },
 								            "category": ["podcasts"]
 								        },
-												Add TWiML podcast to universe [ci skip]

											
										
										
											2019-05-11 18:48:22 +03:00
+								        {
 								            "type": "education",
 								            "id": "twimlai-podcast",
 								            "title": "TWiML & AI: Practical NLP with spaCy and Prodigy",
 								            "slogan": "May 2019",
 								            "description": "\"Ines and I caught up to discuss her various projects, including the aforementioned SpaCy, an open-source NLP library built with a focus on industry and production use cases. In our conversation, Ines gives us an overview of the SpaCy Library, a look at some of the use cases that excite her, and the Spacy community and contributors. We also discuss her work with Prodigy, an annotation service tool that uses continuous active learning to train models, and finally, what other exciting projects she is working on.\"",
 								            "thumb": "https://i.imgur.com/ng2F5gK.png",
 								            "url": "https://twimlai.com/twiml-talk-262-practical-natural-language-processing-with-spacy-and-prodigy-w-ines-montani",
 								            "iframe": "https://html5-player.libsyn.com/embed/episode/id/9691514/height/90/theme/custom/thumbnail/no/preload/no/direction/backward/render-playlist/no/custom-color/3e85b1/",
 								            "iframe_height": 90,
 								            "author": "Sam Charrington",
 								            "author_links": {
 								                "website": "https://twimlai.com"
 								            },
 								            "category": ["podcasts"]
-												Update universe [ci skip]

											
										
										
											2019-06-03 13:19:13 +03:00
+								        },
 								        {
 								            "type": "education",
 								            "id": "analytics-vidhya",
 								            "title": "DataHack Radio #23: The Brains behind spaCy",
 								            "slogan": "June 2019",
 								            "description": "\"What would you do if you had the chance to pick the brains behind one of the most popular Natural Language Processing (NLP) libraries of our era? A library that has helped usher in the current boom in NLP applications and nurtured tons of NLP scientists? Well – you invite the creators on our popular DataHack Radio podcast and let them do the talking! We are delighted to welcome Ines Montani and Matt Honnibal, the developers of spaCy – a powerful and advanced library for NLP.\"",
 								            "thumb": "https://i.imgur.com/3zJKZ1P.jpg",
 								            "url": "https://www.analyticsvidhya.com/blog/2019/06/datahack-radio-ines-montani-matthew-honnibal-brains-behind-spacy/",
 								            "soundcloud": "630741825",
 								            "author": "Analytics Vidhya",
 								            "author_links": {
 								                "website": "https://www.analyticsvidhya.com",
 								                "twitter": "analyticsvidhya"
 								            },
 								            "category": ["podcasts"]
-												Add TWiML podcast to universe [ci skip]

											
										
										
											2019-05-11 18:48:22 +03:00
+								        },
-												Update universe [ci skip]

											
										
										
											2019-12-13 17:57:39 +03:00
+								        {
 								            "type": "education",
 								            "id": "practical-ai-podcast",
 								            "title": "Practical AI: Modern NLP with spaCy",
 								            "slogan": "December 2019",
 								            "description": "\"SpaCy is awesome for NLP! It’s easy to use, has widespread adoption, is open source, and integrates the latest language models. Ines Montani and Matthew Honnibal (core developers of spaCy and co-founders of Explosion) join us to discuss the history of the project, its capabilities, and the latest trends in NLP. We also dig into the practicalities of taking NLP workflows to production. You don’t want to miss this episode!\"",
 								            "thumb": "https://i.imgur.com/jn8Bcdw.png",
 								            "url": "https://changelog.com/practicalai/68",
 								            "author": "Daniel Whitenack & Chris Benson",
 								            "author_links": {
 								                "website": "https://changelog.com/practicalai",
 								                "twitter": "https://twitter.com/PracticalAIFM"
 								            },
 								            "category": ["podcasts"]
 								        },
-												Update universe and display of videos [ci skip]

											
										
										
											2020-05-21 22:54:23 +03:00
+								        {
 								            "type": "education",
 								            "id": "video-entity-linking",
 								            "title": "Training a custom entity linking mode with spaCy",
 								            "author": "Sofie Van Landeghem",
 								            "author_links": {
 								                "twitter": "OxyKodit",
 								                "github": "svlandeg"
 								            },
 								            "youtube": "8u57WSXVpmw",
 								            "category": ["videos"]
 								        },
-												Added Adam project to spaCy Universe (#2275)

* Added 5hirish to contributors

* Added Adam Qas Project to spaCy Universe

* Remove $ from code example

											
										
										
											2018-04-30 23:25:01 +03:00
+								        {
 								            "id": "adam_qas",
 								            "title": "ADAM: Question Answering System",
 								            "slogan": "A question answering system that extracts answers from Wikipedia to questions posed in natural language.",
 								            "github": "5hirish/adam_qas",
 								            "pip": "qas",
 								            "code_example": [
 								                "git clone https://github.com/5hirish/adam_qas.git",
 								                "cd adam_qas",
 								                "pip install -r requirements.txt",
 								                "python -m qas.adam 'When was linux kernel version 4.0 released ?'"
 								            ],
 								            "code_language": "bash",
 								            "thumb": "https://shirishkadam.files.wordpress.com/2018/04/mini_alleviate.png",
 								            "author": "Shirish Kadam",
 								            "author_links": {
 								                "twitter": "5hirish",
 								                "github": "5hirish",
 								                "website": "https://shirishkadam.com/"
 								            },
 								            "category": ["standalone"],
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								            "tags": ["question-answering", "elasticsearch"]
-												Add self-attentive-parser to universe (see #59)

											
										
										
											2018-05-30 14:31:28 +03:00
+								        },
-												Add EpiTator to Universe (#2429)


											
										
										
											2018-06-11 01:24:13 +03:00
+								        {
 								            "id": "epitator",
 								            "title": "EpiTator",
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								            "thumb": "https://i.imgur.com/NYFY1Km.jpg",
-												Add EpiTator to Universe (#2429)


											
										
										
											2018-06-11 01:24:13 +03:00
+								            "slogan": "Extracts case counts, resolved location/species/disease names, date ranges and more",
-												Adjust formatting [ci skip]

											
										
										
											2018-06-11 01:29:13 +03:00
+								            "description": "EcoHealth Alliance uses EpiTator to catalog the what, where and when of infectious disease case counts reported in online news. Each of these aspects is extracted using independent annotators than can be applied to other domains. EpiTator organizes annotations by creating \"AnnoTiers\" for each type. AnnoTiers have methods for manipulating, combining and searching annotations. For instance, the `with_following_spans_from()` method can be used to create a new tier that combines a tier of one type (such as numbers), with another (say, kitchenware). The resulting tier will contain all the phrases in the document that match that pattern, like \"5 plates\" or \"2 cups.\"\n\nAnother commonly used method is `group_spans_by_containing_span()` which can be used to do things like find all the spaCy tokens in all the GeoNames a document mentions. spaCy tokens, named entities, sentences and noun chunks are exposed through the spaCy annotator which will create a AnnoTier for each. These are basis of many of the other annotators. EpiTator also includes an annotator for extracting tables embedded in free text articles. Another neat feature is that the lexicons used for entity resolution are all stored in an embedded sqlite database so there is no need to run any external services in order to use EpiTator.",
-												Add EpiTator to Universe (#2429)


											
										
										
											2018-06-11 01:24:13 +03:00
+								            "url": "https://github.com/ecohealthalliance/EpiTator",
 								            "github": "ecohealthalliance/EpiTator",
 								            "pip": "EpiTator",
 								            "code_example": [
 								                "from epitator.annotator import AnnoDoc",
 								                "from epitator.geoname_annotator import GeonameAnnotator",
-												Adjust formatting [ci skip]

											
										
										
											2018-06-11 01:29:13 +03:00
+								                "",
-												Add EpiTator to Universe (#2429)


											
										
										
											2018-06-11 01:24:13 +03:00
+								                "doc = AnnoDoc('Where is Chiang Mai?')",
 								                "geoname_annotier = doc.require_tiers('geonames', via=GeonameAnnotator)",
 								                "geoname = geoname_annotier.spans[0].metadata['geoname']",
 								                "geoname['name']",
 								                "# = 'Chiang Mai'",
 								                "geoname['geonameid']",
 								                "# = '1153671'",
 								                "geoname['latitude']",
 								                "# = 18.79038",
 								                "geoname['longitude']",
 								                "# = 98.98468",
 								                "",
 								                "from epitator.spacy_annotator import SpacyAnnotator",
 								                "spacy_token_tier = doc.require_tiers('spacy.tokens', via=SpacyAnnotator)",
 								                "list(geoname_annotier.group_spans_by_containing_span(spacy_token_tier))",
 								                "# = [(AnnoSpan(9-19, Chiang Mai), [AnnoSpan(9-15, Chiang), AnnoSpan(16-19, Mai)])]"
 								            ],
 								            "author": "EcoHealth Alliance",
 								            "author_links": {
 								                "github": "ecohealthalliance",
 								                "website": " https://ecohealthalliance.org/"
 								            },
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "category": ["scientific", "standalone"]
-												Add EpiTator to Universe (#2429)


											
										
										
											2018-06-11 01:24:13 +03:00
+								        },
-												Add self-attentive-parser to universe (see #59)

											
										
										
											2018-05-30 14:31:28 +03:00
+								        {
 								            "id": "self-attentive-parser",
 								            "title": "Berkeley Neural Parser",
 								            "slogan": "Constituency Parsing with a Self-Attentive Encoder (ACL 2018)",
 								            "description": "A Python implementation of the parsers described in *\"Constituency Parsing with a Self-Attentive Encoder\"* from ACL 2018.",
 								            "url": "https://arxiv.org/abs/1805.01052",
 								            "github": "nikitakit/self-attentive-parser",
 								            "pip": "benepar",
 								            "code_example": [
 								                "import spacy",
 								                "from benepar.spacy_plugin import BeneparComponent",
-												Update formatting [ci skip]

											
										
										
											2018-05-30 14:32:49 +03:00
+								                "",
-												Add self-attentive-parser to universe (see #59)

											
										
										
											2018-05-30 14:31:28 +03:00
+								                "nlp = spacy.load('en')",
 								                "nlp.add_pipe(BeneparComponent('benepar_en'))",
-												Remove u-strings and fix formatting [ci skip]

											
										
										
											2019-09-12 17:11:15 +03:00
+								                "doc = nlp('The time for action is now. It's never too late to do something.')",
-												Add self-attentive-parser to universe (see #59)

											
										
										
											2018-05-30 14:31:28 +03:00
+								                "sent = list(doc.sents)[0]",
 								                "print(sent._.parse_string)",
 								                "# (S (NP (NP (DT The) (NN time)) (PP (IN for) (NP (NN action)))) (VP (VBZ is) (ADVP (RB now))) (. .))",
 								                "print(sent._.labels)",
 								                "# ('S',)",
 								                "print(list(sent._.children)[0])",
 								                "# The time for action"
 								            ],
 								            "author": "Nikita Kitaev",
 								            "author_links": {
 								                "github": "nikitakit",
 								                "website": " http://kitaev.io"
 								            },
 								            "category": ["research", "pipeline"]
-												Add ExcelCy into Universe list (#2572)

Hi guys,

This is my first spaCy extension. I am excited to able to do this. Please do let me know if there is any suggestions or modifications I need to do. Feel free to use/contribute the repo that I made.

## Description
ExcelCy is a SpaCy toolkit to help improve the data training experiences. It provides easy annotation using Excel file format. It has helper to pre-train entity annotation with phrase and regex matcher pipe.

### Types of change
Update to Universe list in website.

## Checklist
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-07-19 20:28:33 +03:00
+								        },
 								        {
 								            "id": "excelcy",
 								            "title": "ExcelCy",
-												Fix typo (#2579)

Update slogan, desc and code snippet to latest version
											
										
										
											2018-07-24 23:47:33 +03:00
+								            "slogan": "Excel Integration with spaCy. Training NER using XLSX from PDF, DOCX, PPT, PNG or JPG.",
 								            "description": "ExcelCy is a toolkit to integrate Excel to spaCy NLP training experiences. Training NER using XLSX from PDF, DOCX, PPT, PNG or JPG. ExcelCy has pipeline to match Entity with PhraseMatcher or Matcher in regular expression.",
-												Add ExcelCy into Universe list (#2572)

Hi guys,

This is my first spaCy extension. I am excited to able to do this. Please do let me know if there is any suggestions or modifications I need to do. Feel free to use/contribute the repo that I made.

## Description
ExcelCy is a SpaCy toolkit to help improve the data training experiences. It provides easy annotation using Excel file format. It has helper to pre-train entity annotation with phrase and regex matcher pipe.

### Types of change
Update to Universe list in website.

## Checklist
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-07-19 20:28:33 +03:00
+								            "url": "https://github.com/kororo/excelcy",
 								            "github": "kororo/excelcy",
 								            "pip": "excelcy",
 								            "code_example": [
 								                "from excelcy import ExcelCy",
-												Fix typo (#2579)

Update slogan, desc and code snippet to latest version
											
										
										
											2018-07-24 23:47:33 +03:00
+								                "# collect sentences, annotate Entities and train NER using spaCy",
 								                "excelcy = ExcelCy.execute(file_path='https://github.com/kororo/excelcy/raw/master/tests/data/test_data_01.xlsx')",
 								                "# use the nlp object as per spaCy API",
 								                "doc = excelcy.nlp('Google rebrands its business apps')",
 								                "# or save it for faster bootstrap for application",
 								                "excelcy.nlp.to_disk('/model')"
-												Add ExcelCy into Universe list (#2572)

Hi guys,

This is my first spaCy extension. I am excited to able to do this. Please do let me know if there is any suggestions or modifications I need to do. Feel free to use/contribute the repo that I made.

## Description
ExcelCy is a SpaCy toolkit to help improve the data training experiences. It provides easy annotation using Excel file format. It has helper to pre-train entity annotation with phrase and regex matcher pipe.

### Types of change
Update to Universe list in website.

## Checklist
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2018-07-19 20:28:33 +03:00
+								            ],
 								            "author": "Robertus Johansyah",
 								            "author_links": {
 								                "github": "kororo"
 								            },
 								            "category": ["training"],
 								            "tags": ["excel"]
-												Update universe [ci skip]

											
										
										
											2018-08-02 18:33:08 +03:00
+								        },
 								        {
 								            "id": "spacy-graphql",
 								            "title": "spacy-graphql",
 								            "slogan": "Query spaCy's linguistic annotations using GraphQL",
 								            "github": "ines/spacy-graphql",
 								            "description": "A very simple and experimental app that lets you query spaCy's linguistic annotations using [GraphQL](https://graphql.org/). The API currently supports most token attributes, named entities, sentences and text categories (if available as `doc.cats`, i.e. if you added a text classifier to a model). The `meta` field will return the model meta data. Models are only loaded once and kept in memory.",
 								            "url": "https://explosion.ai/demos/spacy-graphql",
 								            "category": ["apis"],
 								            "tags": ["graphql"],
 								            "thumb": "https://i.imgur.com/xC7zpTO.png",
 								            "code_example": [
 								                "{",
 								                "  nlp(text: \"Zuckerberg is the CEO of Facebook.\", model: \"en_core_web_sm\") {",
 								                "    meta {",
 								                "      lang",
 								                "      description",
 								                "    }",
 								                "    doc {",
 								                "      text",
 								                "      tokens {",
 								                "        text",
 								                "        pos_",
 								                "      }",
 								                "      ents {",
 								                "        text",
 								                "        label_",
 								                "      }",
 								                "    }",
 								                "  }",
 								                "}"
 								            ],
 								            "code_language": "json",
 								            "author": "Ines Montani",
 								            "author_links": {
 								                "twitter": "_inesmontani",
 								                "github": "ines",
 								                "website": "https://ines.io"
 								            }
-												Add spacy-js to universe [ci-skip]

											
										
										
											2018-11-06 14:45:03 +03:00
+								        },
 								        {
 								            "id": "spacy-js",
 								            "title": "spacy-js",
 								            "slogan": "JavaScript API for spaCy with Python REST API",
 								            "github": "ines/spacy-js",
 								            "description": "JavaScript interface for accessing linguistic annotations provided by spaCy. This project is mostly experimental and was developed for fun to play around with different ways of mimicking spaCy's Python API.\n\nThe results will still be computed in Python and made available via a REST API. The JavaScript API resembles spaCy's Python API as closely as possible (with a few exceptions, as the values are all pre-computed and it's tricky to express complex recursive relationships).",
 								            "code_language": "javascript",
 								            "code_example": [
 								                "const spacy = require('spacy');",
 								                "",
 								                "(async function() {",
 								                "    const nlp = spacy.load('en_core_web_sm');",
 								                "    const doc = await nlp('This is a text about Facebook.');",
 								                "    for (let ent of doc.ents) {",
 								                "        console.log(ent.text, ent.label);",
 								                "    }",
 								                "    for (let token of doc) {",
 								                "        console.log(token.text, token.pos, token.head.text);",
 								                "    }",
 								                "})();"
 								            ],
 								            "author": "Ines Montani",
 								            "author_links": {
 								                "twitter": "_inesmontani",
 								                "github": "ines",
 								                "website": "https://ines.io"
 								            },
 								            "category": ["nonpython"],
 								            "tags": ["javascript"]
-												Add spacy-raspberry to universe (closes #2889)

											
										
										
											2018-11-06 14:45:50 +03:00
+								        },
 								        {
 								            "id": "spacy-raspberry",
 								            "title": "spacy-raspberry",
 								            "slogan": "64bit Raspberry Pi image for spaCy and neuralcoref",
 								            "github": "boehm-e/spacy-raspberry",
 								            "thumb": "https://i.imgur.com/VCJMrE6.png",
 								            "image": "https://raw.githubusercontent.com/boehm-e/spacy-raspberry/master/imgs/preview.png",
 								            "author": "Erwan Boehm",
 								            "author_links": {
 								                "github": "boehm-e"
 								            },
 								            "category": ["apis"],
 								            "tags": ["raspberrypi"]
-												Include universe spec for spacy-wordnet component (#2919)

* feat: include universe spec for spacy-wordnet component

* chore: include spaCy contributor agreement

											
										
										
											2018-11-14 01:54:46 +03:00
+								        },
 								        {
 								            "id": "spacy-wordnet",
-												Update universe [ci skip]

											
										
										
											2018-11-26 16:16:22 +03:00
+								            "title": "spacy-wordnet",
-												Minor formatting changes [ci skip]

											
										
										
											2018-11-14 01:59:59 +03:00
+								            "slogan": "WordNet meets spaCy",
 								            "description": "`spacy-wordnet` creates annotations that easily allow the use of WordNet and [WordNet Domains](http://wndomains.fbk.eu/) by using the [NLTK WordNet interface](http://www.nltk.org/howto/wordnet.html)",
-												Include universe spec for spacy-wordnet component (#2919)

* feat: include universe spec for spacy-wordnet component

* chore: include spaCy contributor agreement

											
										
										
											2018-11-14 01:54:46 +03:00
+								            "github": "recognai/spacy-wordnet",
 								            "tags": ["wordnet", "synsets"],
-												Fix image [ci skip]

Twitter URL doesn't work on live site

											
										
										
											2018-11-14 03:01:33 +03:00
+								            "thumb": "https://i.imgur.com/3y2uPUv.jpg",
-												Include universe spec for spacy-wordnet component (#2919)

* feat: include universe spec for spacy-wordnet component

* chore: include spaCy contributor agreement

											
										
										
											2018-11-14 01:54:46 +03:00
+								            "code_example": [
 								                "import spacy",
-												typo fix for wordnet_annotator (#4326)


											
										
										
											2019-09-27 12:52:53 +03:00
+								                "from spacy_wordnet.wordnet_annotator import WordnetAnnotator ",
-												Include universe spec for spacy-wordnet component (#2919)

* feat: include universe spec for spacy-wordnet component

* chore: include spaCy contributor agreement

											
										
										
											2018-11-14 01:54:46 +03:00
+								                "",
 								                "# Load an spacy model (supported models are \"es\" and \"en\") ",
 								                "nlp = spacy.load('en')",
 								                "nlp.add_pipe(WordnetAnnotator(nlp.lang), after='tagger')",
 								                "token = nlp('prices')[0]",
 								                "",
 								                "# wordnet object link spacy token with nltk wordnet interface by giving acces to",
 								                "# synsets and lemmas ",
 								                "token._.wordnet.synsets()",
 								                "token._.wordnet.lemmas()",
 								                "",
 								                "# And automatically tags with wordnet domains",
 								                "token._.wordnet.wordnet_domains()"
 								            ],
 								            "author": "recognai",
 								            "author_links": {
 								                "github": "recognai",
 								                "twitter": "recogn_ai",
 								                "website": "https://recogn.ai"
 								            },
 								            "category": ["pipeline"]
-												Update universe [ci skip]

											
										
										
											2018-11-26 16:16:22 +03:00
+								        },
 								        {
-												Updated spacy_conll information (#3158)


											
										
										
											2019-01-16 15:46:16 +03:00
+								            "id": "spacy-conll",
 								            "title": "spacy_conll",
-												Changes to spacy_conll in universe (#4914)

* Update information on spacy_conll

* Typo fix

											
										
										
											2020-01-16 03:56:39 +03:00
+								            "slogan": "Parse text with spaCy and gets its output in CoNLL-U format",
 								            "description": "This module allows you to parse a text to CoNLL-U format. It contains a pipeline component for spaCy that adds CoNLL-U properties to a Doc and its sentences. It can also be used as a command-line tool.",
-												Update universe [ci skip]

											
										
										
											2018-11-26 16:16:22 +03:00
+								            "code_example": [
-												Changes to spacy_conll in universe (#4914)

* Update information on spacy_conll

* Typo fix

											
										
										
											2020-01-16 03:56:39 +03:00
+								                "import spacy",
 								                "from spacy_conll import ConllFormatter",
-												Updated spacy_conll information (#3158)


											
										
										
											2019-01-16 15:46:16 +03:00
+								                "",
-												Changes to spacy_conll in universe (#4914)

* Update information on spacy_conll

* Typo fix

											
										
										
											2020-01-16 03:56:39 +03:00
+								                "nlp = spacy.load('en')",
 								                "conllformatter = ConllFormatter(nlp)",
 								                "nlp.add_pipe(conllformatter, after='parser')",
 								                "doc = nlp('I like cookies. Do you?')",
 								                "conll = doc._.conll",
-												Auto-format and fix image [ci skip]

											
										
										
											2020-02-23 15:56:50 +03:00
+								                "print(doc._.conll_str_headers)",
-												Changes to spacy_conll in universe (#4914)

* Update information on spacy_conll

* Typo fix

											
										
										
											2020-01-16 03:56:39 +03:00
+								                "print(doc._.conll_str)"
-												Update universe [ci skip]

											
										
										
											2018-11-26 16:16:22 +03:00
+								            ],
-												Updated spacy_conll information (#3158)


											
										
										
											2019-01-16 15:46:16 +03:00
+								            "code_language": "python",
 								            "author": "Bram Vanroy",
-												Update universe [ci skip]

											
										
										
											2018-11-26 16:16:22 +03:00
+								            "author_links": {
-												Auto-format and fix image [ci skip]

											
										
										
											2020-02-23 15:56:50 +03:00
+								                "github": "BramVanroy",
-												Changes to spacy_conll in universe (#4914)

* Update information on spacy_conll

* Typo fix

											
										
										
											2020-01-16 03:56:39 +03:00
+								                "twitter": "BramVanroy",
-												Updated spacy_conll information (#3158)


											
										
										
											2019-01-16 15:46:16 +03:00
+								                "website": "https://bramvanroy.be"
-												Update universe [ci skip]

											
										
										
											2018-11-26 16:16:22 +03:00
+								            },
-												Updated spacy_conll information (#3158)


											
										
										
											2019-01-16 15:46:16 +03:00
+								            "github": "BramVanroy/spacy_conll",
-												Changes to spacy_conll in universe (#4914)

* Update information on spacy_conll

* Typo fix

											
										
										
											2020-01-16 03:56:39 +03:00
+								            "category": ["standalone", "pipeline"],
 								            "tags": ["linguistics", "computational linguistics", "conll"]
-												added spacy-langdetect to universe.json (#3266)


											
										
										
											2019-02-12 20:04:38 +03:00
+								        },
 								        {
 								            "id": "spacy-langdetect",
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								            "title": "spacy-langdetect",
-												added spacy-langdetect to universe.json (#3266)


											
										
										
											2019-02-12 20:04:38 +03:00
+								            "slogan": "A fully customizable language detection pipeline for spaCy",
 								            "description": "This module allows you to add language detection capabilites to your spaCy pipeline. Also supports custom language detectors!",
 								            "pip": "spacy-langdetect",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_langdetect import LanguageDetector",
 								                "nlp = spacy.load('en')",
 								                "nlp.add_pipe(LanguageDetector(), name='language_detector', last=True)",
 								                "text = 'This is an english text.'",
-												Update universe.json
											
										
										
											2019-02-12 20:05:51 +03:00
+								                "doc = nlp(text)",
-												added spacy-langdetect to universe.json (#3266)


											
										
										
											2019-02-12 20:04:38 +03:00
+								                "# document level language detection. Think of it like average language of the document!",
 								                "print(doc._.language)",
 								                "# sentence level language detection",
 								                "for sent in doc.sents:",
 								                "   print(sent, sent._.language)"
 								            ],
 								            "code_language": "python",
 								            "author": "Abhijit Balaji",
 								            "author_links": {
 								                "github": "Abhijit-2592",
 								                "website": "https://abhijit-2592.github.io/"
 								            },
 								            "github": "Abhijit-2592/spacy-langdetect",
 								            "category": ["pipeline"],
 								            "tags": ["language-detection"]
-												Added Ludwig among the projects (#3548) [ci skip]

* Added Ludwig among the projects

* Create w4nderlust.md

* Add Uber to logo wall

											
										
										
											2019-04-07 14:01:26 +03:00
+								        },
 								        {
 								            "id": "ludwig",
 								            "title": "Ludwig",
 								            "slogan": "A code-free deep learning toolbox",
 								            "description": "Ludwig makes it easy to build deep learning models for many applications, including NLP ones. It uses spaCy for tokenizing text in different languages.",
 								            "pip": "ludwig",
 								            "github": "uber/ludwig",
 								            "thumb": "https://i.imgur.com/j1sORgD.png",
 								            "url": "http://ludwig.ai",
 								            "author": "Piero Molino @ Uber AI",
 								            "author_links": {
 								                "github": "w4nderlust",
 								                "twitter": "w4nderlus7",
 								                "website": "http://w4nderlu.st"
 								            },
 								            "category": ["standalone", "research"]
-												Added project gracyql to Universe (#3570) (resolves #3568)

As discussed with Ines in https://github.com/explosion/spaCy/issues/3568 , adding a new project proposal for the community in SpaCy Universe website

GracyQL a tiny graphql wrapper aroung spacy using graphene and starlette.

## Description
Change only in universe.json file to add a new project

### Types of change
New project reference in Universe

## Checklist
- [x ] I have submitted the spaCy Contributor Agreement.
- [x ] I ran the tests, and all new and existing tests passed.
- [ x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-04-10 18:54:42 +03:00
+								        },
 								        {
-												Update universe.json

A bot powered by Clarifai Predict API and spaCy. Can be found in Telegram messenger at @pic2phrase_bot
											
										
										
											2020-03-21 18:39:15 +03:00
+								            "id": "pic2phrase_bot",
 								            "title": "pic2phrase_bot: Photo Description Generator",
 								            "slogan": "A bot that generates descriptions to submitted photos, in a human-like manner.",
-												Update website/meta/universe.json
											
										
										
											2020-04-29 13:51:37 +03:00
+								            "description": "pic2phrase_bot runs inside Telegram messenger and can be used to generate a phrase describing a submitted photo, employing computer vision, web scraping, and syntactic dependency analysis powered by spaCy.",
-												Update universe.json
											
										
										
											2020-04-03 19:10:03 +03:00
+								            "thumb": "https://i.imgur.com/ggVI02O.jpg",
 								            "image": "https://i.imgur.com/z1yhWQR.jpg",
-												Remove u string and auto-format [ci skip]

											
										
										
											2020-04-29 13:54:57 +03:00
+								            "url": "https://telegram.me/pic2phrase_bot",
-												Update universe.json

A bot powered by Clarifai Predict API and spaCy. Can be found in Telegram messenger at @pic2phrase_bot
											
										
										
											2020-03-21 18:39:15 +03:00
+								            "author": "Yuli Vasiliev",
 								            "author_links": {
-												Remove u string and auto-format [ci skip]

											
										
										
											2020-04-29 13:54:57 +03:00
+								                "twitter": "VasilievYuli"
-												Update universe.json

A bot powered by Clarifai Predict API and spaCy. Can be found in Telegram messenger at @pic2phrase_bot
											
										
										
											2020-03-21 18:39:15 +03:00
+								            },
-												Update website/meta/universe.json
											
										
										
											2020-04-29 13:51:44 +03:00
+								            "category": ["standalone", "conversational"]
-												Update universe.json

A bot powered by Clarifai Predict API and spaCy. Can be found in Telegram messenger at @pic2phrase_bot
											
										
										
											2020-03-21 18:39:15 +03:00
+								        },
-												Added project gracyql to Universe (#3570) (resolves #3568)

As discussed with Ines in https://github.com/explosion/spaCy/issues/3568 , adding a new project proposal for the community in SpaCy Universe website

GracyQL a tiny graphql wrapper aroung spacy using graphene and starlette.

## Description
Change only in universe.json file to add a new project

### Types of change
New project reference in Universe

## Checklist
- [x ] I have submitted the spaCy Contributor Agreement.
- [x ] I ran the tests, and all new and existing tests passed.
- [ x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-04-10 18:54:42 +03:00
+								        {
 								            "id": "gracyql",
 								            "title": "gracyql",
 								            "slogan": "A thin GraphQL wrapper around spacy",
 								            "github": "oterrier/gracyql",
 								            "description": "An example of a basic [Starlette](https://github.com/encode/starlette) app using [Spacy](https://github.com/explosion/spaCy) and [Graphene](https://github.com/graphql-python/graphene). The main goal is to be able to use the amazing power of spaCy from other languages and retrieving only the information you need thanks to the GraphQL query definition. The GraphQL schema tries to mimic as much as possible the original Spacy API with classes Doc, Span and Token.",
 								            "thumb": "https://i.imgur.com/xC7zpTO.png",
 								            "category": ["apis"],
 								            "tags": ["graphql"],
 								            "code_example": [
 								                "query ParserDisabledQuery {",
 								                "  nlp(model: \"en\", disable: [\"parser\", \"ner\"]) {",
 								                "    doc(text: \"I live in Grenoble, France\") {",
 								                "      text",
 								                "      tokens {",
 								                "        id",
 								                "        pos",
 								                "        lemma",
 								                "        dep",
 								                "      }",
 								                "      ents {",
 								                "        start",
 								                "        end",
 								                "        label",
 								                "      }",
 								                "    }",
 								                "  }",
 								                "}"
 								            ],
 								            "code_language": "json",
 								            "author": "Olivier Terrier",
 								            "author_links": {
 								                "github": "oterrier"
 								            }
-												Update Universe Website for pyInflect (#3641)


											
										
										
											2019-04-26 14:17:36 +03:00
+								        },
 								        {
 								            "id": "pyInflect",
-												Update universe.json [ci skip]

											
										
										
											2019-08-28 14:45:06 +03:00
+								            "slogan": "A Python module for word inflections",
-												Update Universe Website for pyInflect (#3641)


											
										
										
											2019-04-26 14:17:36 +03:00
+								            "description": "This package uses the [spaCy 2.0 extensions](https://spacy.io/usage/processing-pipelines#extensions) to add word inflections to the system.",
 								            "github": "bjascob/pyInflect",
 								            "pip": "pyinflect",
 								            "code_example": [
 								                "import spacy",
 								                "import pyinflect",
 								                "",
 								                "nlp = spacy.load('en_core_web_sm')",
 								                "doc = nlp('This is an example.')",
 								                "doc[3].tag_                # NN",
 								                "doc[3]._.inflect('NNS')    # examples"
 								            ],
 								            "author": "Brad Jascob",
 								            "author_links": {
 								                "github": "bjascob"
 								            },
 								            "category": ["pipeline"],
 								            "tags": ["inflection"]
-												Update universe.json (#3653) [ci skip]

* Update universe.json

* Update universe.json

											
										
										
											2019-05-03 12:50:12 +03:00
+								        },
-												Update universe.json [ci skip]

											
										
										
											2019-08-28 14:45:06 +03:00
+								        {
 								            "id": "lemminflect",
 								            "slogan": "A Python module for English lemmatization and inflection",
 								            "description": "LemmInflect uses a dictionary approach to lemmatize English words and inflect them into forms specified by a user supplied [Universal Dependencies](https://universaldependencies.org/u/pos/) or [Penn Treebank](https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html) tag.  The library works with out-of-vocabulary (OOV) words by applying neural network techniques to classify word forms and choose the appropriate morphing rules. The system acts as a standalone module or as an extension to spaCy.",
 								            "github": "bjascob/LemmInflect",
 								            "pip": "lemminflect",
 								            "thumb": "https://raw.githubusercontent.com/bjascob/LemmInflect/master/docs/img/icons8-citrus-80.png",
 								            "code_example": [
 								                "import spacy",
 								                "import lemminflect",
 								                "",
 								                "nlp = spacy.load('en_core_web_sm')",
 								                "doc = nlp('I am testing this example.')",
 								                "doc[2]._.lemma()         # 'test'",
 								                "doc[4]._.inflect('NNS')  # 'examples'"
 								            ],
 								            "author": "Brad Jascob",
 								            "author_links": {
 								                "github": "bjascob"
 								            },
 								            "category": ["pipeline"],
 								            "tags": ["inflection", "lemmatizer"]
 								        },
-												Add entry for Blackstone in universe.json (#4101)

* Add entry for Blackstone in universe.json

Add an entry for the Blackstone project. Checked JSON is valid.

* Create ICLRandD.md

* Fix indentation (tabs to spaces)

It looks like during validation, the JSON file automatically changed spaces to tabs. This caused the diff to show *everything* as changed, which is obviously not true. This hopefully fixes that.

* Try to fix formatting for diff

* Fix diff


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-08-09 18:16:51 +03:00
+								        {
 								            "id": "blackstone",
 								            "title": "Blackstone",
-												Update universe.json [ci skip]

											
										
										
											2019-08-09 18:42:37 +03:00
+								            "slogan": "A spaCy pipeline and model for NLP on unstructured legal text",
 								            "description": "Blackstone is a spaCy model and library for processing long-form, unstructured legal text. Blackstone is an experimental research project from the [Incorporated Council of Law Reporting for England and Wales'](https://iclr.co.uk/) research lab, [ICLR&D](https://research.iclr.co.uk/).",
-												Add entry for Blackstone in universe.json (#4101)

* Add entry for Blackstone in universe.json

Add an entry for the Blackstone project. Checked JSON is valid.

* Create ICLRandD.md

* Fix indentation (tabs to spaces)

It looks like during validation, the JSON file automatically changed spaces to tabs. This caused the diff to show *everything* as changed, which is obviously not true. This hopefully fixes that.

* Try to fix formatting for diff

* Fix diff


Co-authored-by: Ines Montani <ines@ines.io>
											
										
										
											2019-08-09 18:16:51 +03:00
+								            "github": "ICLRandD/Blackstone",
 								            "pip": "blackstone",
 								            "thumb": "https://iclr.s3-eu-west-1.amazonaws.com/assets/iclrand/Blackstone/thumb.png",
 								            "url": "https://research.iclr.co.uk",
 								            "author": " ICLR&D",
 								            "author_links": {
 								                "github": "ICLRandD",
 								                "twitter": "ICLRanD",
 								                "website": "https://research.iclr.co.uk"
 								            },
 								            "category": ["scientific", "models", "research"]
 								        },
-												Update universe.json (#3653) [ci skip]

* Update universe.json

* Update universe.json

											
										
										
											2019-05-03 12:50:12 +03:00
+								        {
 								            "id": "NGym",
-												Adjust wording and formatting [ci skip]

											
										
										
											2019-05-03 13:00:31 +03:00
+								            "title": "NeuralGym",
 								            "slogan": "A little Windows GUI for training models with spaCy",
 								            "description": "NeuralGym is a Python application for Windows with a graphical user interface to train models with spaCy. Run the application, select an output folder, a training data file in spaCy's data format, a spaCy model or blank model and press 'Start'.",
-												Update universe.json (#3653) [ci skip]

* Update universe.json

* Update universe.json

											
										
										
											2019-05-03 12:50:12 +03:00
+								            "github": "d5555/NeuralGym",
 								            "url": "https://github.com/d5555/NeuralGym",
 								            "image": "https://github.com/d5555/NeuralGym/raw/master/NGym.png",
 								            "thumb": "https://github.com/d5555/NeuralGym/raw/master/NGym/web.png",
 								            "author": "d5555",
 								            "category": ["training"],
-												Adjust wording and formatting [ci skip]

											
										
										
											2019-05-03 13:00:31 +03:00
+								            "tags": ["windows"]
-												Request to include Holmes in spaCy Universe (#3685)

* Request to add Holmes to spaCy Universe

Dear spaCy team, I would be grateful if you would consider my Python library Holmes for inclusion in the spaCy Universe. Holmes transforms the syntactic structures delivered by spaCy into semantic structures that, together with various other techniques including ontological matching and word embeddings, serve as the basis for information extraction. Holmes supports several use cases including chatbot, structured search, topic matching and supervised document classification. I had the basic idea for Holmes around 15 years ago and now spaCy has made it possible to build an implementation that is stable and fast enough to actually be of use - thank you! At present Holmes supports English and German (I am based in Munich) but could easily be extended to support any other language with a spaCy model.

* Added

											
										
										
											2019-05-08 03:42:03 +03:00
+								        },
 								        {
 								            "id": "holmes",
 								            "title": "Holmes",
 								            "slogan": "Information extraction from English and German texts based on predicate logic",
 								            "github": "msg-systems/holmes-extractor",
 								            "url": "https://github.com/msg-systems/holmes-extractor",
-												Update to Holmes Universe entry (#4679)

* Updated Universe entry for Holmes

* Correction

* Updated model name

* Updated wording

											
										
										
											2019-11-21 18:23:24 +03:00
+								            "description": "Holmes is a Python 3 library that supports a number of use cases involving information extraction from English and German texts, including chatbot, structural extraction, topic matching and supervised document classification. There is a [website demonstrating intelligent search based on topic matching](https://holmes-demo.xt.msg.team).",
-												Request to include Holmes in spaCy Universe (#3685)

* Request to add Holmes to spaCy Universe

Dear spaCy team, I would be grateful if you would consider my Python library Holmes for inclusion in the spaCy Universe. Holmes transforms the syntactic structures delivered by spaCy into semantic structures that, together with various other techniques including ontological matching and word embeddings, serve as the basis for information extraction. Holmes supports several use cases including chatbot, structured search, topic matching and supervised document classification. I had the basic idea for Holmes around 15 years ago and now spaCy has made it possible to build an implementation that is stable and fast enough to actually be of use - thank you! At present Holmes supports English and German (I am based in Munich) but could easily be extended to support any other language with a spaCy model.

* Added

											
										
										
											2019-05-08 03:42:03 +03:00
+								            "pip": "holmes-extractor",
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								            "category": ["conversational", "standalone"],
-												Auto-format

											
										
										
											2019-05-11 18:48:07 +03:00
+								            "tags": ["chatbots", "text-processing"],
-												Update to Holmes Universe entry (#4679)

* Updated Universe entry for Holmes

* Correction

* Updated model name

* Updated wording

											
										
										
											2019-11-21 18:23:24 +03:00
+								            "thumb": "https://raw.githubusercontent.com/msg-systems/holmes-extractor/master/docs/holmes_thumbnail.png",
-												Request to include Holmes in spaCy Universe (#3685)

* Request to add Holmes to spaCy Universe

Dear spaCy team, I would be grateful if you would consider my Python library Holmes for inclusion in the spaCy Universe. Holmes transforms the syntactic structures delivered by spaCy into semantic structures that, together with various other techniques including ontological matching and word embeddings, serve as the basis for information extraction. Holmes supports several use cases including chatbot, structured search, topic matching and supervised document classification. I had the basic idea for Holmes around 15 years ago and now spaCy has made it possible to build an implementation that is stable and fast enough to actually be of use - thank you! At present Holmes supports English and German (I am based in Munich) but could easily be extended to support any other language with a spaCy model.

* Added

											
										
										
											2019-05-08 03:42:03 +03:00
+								            "code_example": [
 								                "import holmes_extractor as holmes",
-												Update to Holmes Universe entry (#4679)

* Updated Universe entry for Holmes

* Correction

* Updated model name

* Updated wording

											
										
										
											2019-11-21 18:23:24 +03:00
+								                "holmes_manager = holmes.Manager(model='en_core_web_lg')",
-												Request to include Holmes in spaCy Universe (#3685)

* Request to add Holmes to spaCy Universe

Dear spaCy team, I would be grateful if you would consider my Python library Holmes for inclusion in the spaCy Universe. Holmes transforms the syntactic structures delivered by spaCy into semantic structures that, together with various other techniques including ontological matching and word embeddings, serve as the basis for information extraction. Holmes supports several use cases including chatbot, structured search, topic matching and supervised document classification. I had the basic idea for Holmes around 15 years ago and now spaCy has made it possible to build an implementation that is stable and fast enough to actually be of use - thank you! At present Holmes supports English and German (I am based in Munich) but could easily be extended to support any other language with a spaCy model.

* Added

											
										
										
											2019-05-08 03:42:03 +03:00
+								                "holmes_manager.register_search_phrase('A big dog chases a cat')",
 								                "holmes_manager.start_chatbot_mode_console()"
 								            ],
 								            "author": "Richard Paul Hudson",
 								            "author_links": {
 								                "github": "richardpaulhudson"
 								            }
-												Update universe.json [ci skip]

											
										
										
											2019-08-05 15:30:07 +03:00
+								        },
 								        {
-												Update transformer model details [ci skip]

											
										
										
											2019-10-08 16:39:38 +03:00
+								            "id": "spacy-transformers",
 								            "title": "spacy-transformers",
-												Use consistent spelling

											
										
										
											2019-10-02 11:37:39 +03:00
+								            "slogan": "spaCy pipelines for pretrained BERT, XLNet and GPT-2",
-												Update transformer model details [ci skip]

											
										
										
											2019-10-08 16:39:38 +03:00
+								            "description": "This package provides spaCy model pipelines that wrap [Hugging Face's `transformers`](https://github.com/huggingface/transformers) package, so you can use them in spaCy. The result is convenient access to state-of-the-art transformer architectures, such as BERT, GPT-2, XLNet, etc.",
 								            "github": "explosion/spacy-transformers",
 								            "url": "https://explosion.ai/blog/spacy-transformers",
 								            "pip": "spacy-transformers",
-												Update universe.json [ci skip]

											
										
										
											2019-08-05 15:30:07 +03:00
+								            "category": ["pipeline", "models", "research"],
 								            "code_example": [
 								                "import spacy",
 								                "",
-												Update transformer model details [ci skip]

											
										
										
											2019-10-08 16:39:38 +03:00
+								                "nlp = spacy.load(\"en_trf_bertbaseuncased_lg\")",
-												Update universe.json [ci skip]

											
										
										
											2019-08-05 15:30:07 +03:00
+								                "doc = nlp(\"Apple shares rose on the news. Apple pie is delicious.\")",
 								                "print(doc[0].similarity(doc[7]))",
-												Update transformer model details [ci skip]

											
										
										
											2019-10-08 16:39:38 +03:00
+								                "print(doc._.trf_last_hidden_state.shape)"
-												Update universe.json [ci skip]

											
										
										
											2019-08-05 15:30:07 +03:00
+								            ],
 								            "author": "Explosion",
 								            "author_links": {
 								                "twitter": "explosion_ai",
 								                "github": "explosion",
 								                "website": "https://explosion.ai"
 								            }
-												Update universe.json to include negspacy (#4132)


											
										
										
											2019-08-16 18:48:17 +03:00
+								        },
-												Added RONEC to spaCy Universe (#4151)

* Added RONEC to spaCy Universe

* Added contributor file

* Corrected date from .github/contributors/avramandrei.md

* Convert tabs to spaces

* Remove duplicate keys

Can only have one GitHub link unfortunately

* Also add models category

* Adjust ID

This is used to generate the URL, so a simpler string is better

											
										
										
											2019-08-20 15:46:07 +03:00
+								        {
-												Update universe.json to include negspacy (#4132)


											
										
										
											2019-08-16 18:48:17 +03:00
+								            "id": "negspacy",
 								            "title": "negspaCy",
 								            "slogan": "spaCy pipeline object for negating concepts in text based on the NegEx algorithm.",
 								            "github": "jenojp/negspacy",
 								            "url": "https://github.com/jenojp/negspacy",
 								            "description": "negspacy is a spaCy pipeline component that evaluates whether Named Entities are negated in text. It adds an extension to 'Span' objects.",
 								            "pip": "negspacy",
 								            "category": ["pipeline", "scientific"],
 								            "tags": ["negation", "text-processing"],
-												Auto-format [ci skip]

											
										
										
											2019-08-20 15:46:41 +03:00
+								            "thumb": "https://github.com/jenojp/negspacy/blob/master/docs/thumb.png?raw=true",
 								            "image": "https://github.com/jenojp/negspacy/blob/master/docs/icon.png?raw=true",
-												Update universe.json to include negspacy (#4132)


											
										
										
											2019-08-16 18:48:17 +03:00
+								            "code_example": [
 								                "import spacy",
 								                "from negspacy.negation import Negex",
 								                "",
 								                "nlp = spacy.load(\"en_core_web_sm\")",
 								                "negex = Negex(nlp, ent_types=[\"PERSON','ORG\"])",
 								                "nlp.add_pipe(negex, last=True)",
 								                "",
 								                "doc = nlp(\"She does not like Steve Jobs but likes Apple products.\")",
 								                "for e in doc.ents:",
 								                "    print(e.text, e._.negex)"
 								            ],
 								            "author": "Jeno Pizarro",
 								            "author_links": {
 								                "github": "jenojp",
 								                "twitter": "jenojp"
 								            }
-												Added RONEC to spaCy Universe (#4151)

* Added RONEC to spaCy Universe

* Added contributor file

* Corrected date from .github/contributors/avramandrei.md

* Convert tabs to spaces

* Remove duplicate keys

Can only have one GitHub link unfortunately

* Also add models category

* Adjust ID

This is used to generate the URL, so a simpler string is better

											
										
										
											2019-08-20 15:46:07 +03:00
+								        },
 								        {
 								            "id": "ronec",
 								            "title": "RONEC - Romanian Named Entity Corpus",
 								            "slogan": "Named Entity Recognition corpus for Romanian language.",
 								            "github": "dumitrescustefan/ronec",
 								            "url": "https://github.com/dumitrescustefan/ronec",
 								            "description": "The corpus holds 5127 sentences, annotated with 16 classes, with a total of 26376 annotated entities. The corpus comes into two formats: BRAT and CONLLUP.",
 								            "category": ["standalone", "models"],
 								            "tags": ["ner", "romanian"],
 								            "thumb": "https://raw.githubusercontent.com/dumitrescustefan/ronec/master/res/thumb.png",
 								            "code_example": [
 								                "# to train a new model on ronec",
 								                "python3 convert_spacy.py ronec/conllup/ronec.conllup output",
 								                "python3 -m spacy train ro models output/train_ronec.json output/train_ronec.json -p ent",
 								                "",
 								                "# download the Romanian NER model",
 								                "python -m spacy download ro_ner",
 								                "",
 								                "# load the model and print entities for a simple sentence",
 								                "import spacy",
 								                "",
 								                "nlp = spacy.load(\"ro_ner\")",
 								                "doc = nlp(\"Popescu Ion a fost la Cluj\")",
 								                "",
 								                "for ent in doc.ents:",
 								                "\tprint(ent.text, ent.start_char, ent.end_char, ent.label_)"
 								            ],
 								            "author": "Stefan Daniel Dumitrescu, Andrei-Marius Avram"
-												new universe project - the numeric fused-head (#4192)

* new universe project

* Update website/meta/universe.json

Co-Authored-By: Ines Montani <ines@ines.io>

* Update website/meta/universe.json

Co-Authored-By: Ines Montani <ines@ines.io>

											
										
										
											2019-08-25 18:25:28 +03:00
+								        },
 								        {
 								            "id": "num_fh",
 								            "title": "Numeric Fused-Head",
 								            "slogan": "Numeric Fused-Head Identificaiton and Resolution in English",
 								            "description": "This package provide a wrapper for the Numeric Fused-Head in English. It provides another information layer on numbers that refer to another entity which is not obvious from the syntactic tree.",
 								            "github": "yanaiela/num_fh",
 								            "pip": "num_fh",
 								            "category": ["pipeline", "research"],
 								            "code_example": [
 								                "import spacy",
 								                "from num_fh import NFH",
 								                "nlp = spacy.load('en_core_web_sm')",
 								                "nfh = NFH(nlp)",
 								                "nlp.add_pipe(nfh, first=False)",
 								                "doc = nlp(\"I told you two, that only one of them is the one who will get 2 or 3 icecreams\")",
 								                "",
 								                "assert doc[16]._.is_nfh == True",
 								                "assert doc[18]._.is_nfh == False",
 								                "assert doc[3]._.is_deter_nfh == True",
 								                "assert doc[16]._.is_deter_nfh == False",
-												Fix universe.json [ci skip]

											
										
										
											2019-08-27 13:13:42 +03:00
+								                "assert len(doc._.nfh) == 4"
-												new universe project - the numeric fused-head (#4192)

* new universe project

* Update website/meta/universe.json

Co-Authored-By: Ines Montani <ines@ines.io>

* Update website/meta/universe.json

Co-Authored-By: Ines Montani <ines@ines.io>

											
										
										
											2019-08-25 18:25:28 +03:00
+								            ],
 								            "author": "Yanai Elazar",
 								            "author_links": {
 								                "github": "yanaiela",
 								                "twitter": "yanaiela",
 								                "website": "https://yanaiela.github.io"
 								            }
-												Update universe.json [ci skip]

											
										
										
											2019-08-28 12:59:06 +03:00
+								        },
 								        {
 								            "id": "presidio",
 								            "title": "Presidio",
 								            "slogan": "Context aware, pluggable and customizable data protection and PII data anonymization",
 								            "description": "Presidio *(Origin from Latin praesidium ‘protection, garrison’)* helps to ensure sensitive text is properly managed and governed. It provides fast ***analytics*** and ***anonymization*** for sensitive text such as credit card numbers, names, locations, social security numbers, bitcoin wallets, US phone numbers and financial data. Presidio analyzes the text using predefined or custom recognizers to identify entities, patterns, formats, and checksums with relevant context.",
 								            "url": "https://aka.ms/presidio",
 								            "image": "https://raw.githubusercontent.com/microsoft/presidio/master/docs/assets/before-after.png",
 								            "github": "microsoft/presidio",
 								            "category": ["standalone"],
 								            "thumb": "https://avatars0.githubusercontent.com/u/6154722",
-												Added presidio-research to universe.json (#4950)

* Added presidio-research to universe.json

Added a reference to Presidio Research, the data-science toolbox for Microsoft Presidio.

* Updated url

											
										
										
											2020-02-03 14:57:55 +03:00
+								            "author": "Microsoft",
 								            "author_links": {
 								                "github": "microsoft"
 								            }
 								        },
 								        {
 								            "id": "presidio-research",
 								            "title": "Presidio Research",
 								            "slogan": "Toolbox for developing and evaluating PII detectors, NER models for PII and generating fake PII data",
 								            "description": "This package features data-science related tasks for developing new recognizers for Microsoft Presidio. It is used for the evaluation of the entire system, as well as for evaluating specific PII recognizers or PII detection models. Anyone interested in evaluating an existing Microsoft Presidio instance, a specific PII recognizer or to develop new models or logic for detecting PII could leverage the preexisting work in this package. Additionally, anyone interested in generating new data based on previous datasets (e.g. to increase the coverage of entity values) for Named Entity Recognition models could leverage the data generator contained in this package.",
 								            "url": "https://aka.ms/presidio-research",
 								            "github": "microsoft/presidio-research",
 								            "category": ["standalone"],
 								            "thumb": "https://avatars0.githubusercontent.com/u/6154722",
-												Update universe.json [ci skip]

											
										
										
											2019-08-28 12:59:06 +03:00
+								            "author": "Microsoft",
 								            "author_links": {
 								                "github": "microsoft"
 								            }
-												Add cookiecutter-spacy-fastapi to spacy universe (#4498)


											
										
										
											2019-10-22 15:50:40 +03:00
+								        },
-												✨  project: pySBD - Python Sentence Boundary Disambiguation (#4455)

* ✨  project: pySBD - Python Sentence Boundary Disambiguation

* 📝  Update links and description

* 🐛  Fix missing comma

* Update universe.json

pysbd as a spacy component through entrypoints

* 🚨  Fix universe.json

* 📝  Update code_example

											
										
										
											2019-10-30 14:13:29 +03:00
+								        {
 								            "id": "python-sentence-boundary-disambiguation",
 								            "title": "pySBD - python Sentence Boundary Disambiguation",
-												Update universe.json [ci skip]

											
										
										
											2019-10-30 15:29:00 +03:00
+								            "slogan": "Rule-based sentence boundary detection that works out-of-the-box",
-												✨  project: pySBD - Python Sentence Boundary Disambiguation (#4455)

* ✨  project: pySBD - Python Sentence Boundary Disambiguation

* 📝  Update links and description

* 🐛  Fix missing comma

* Update universe.json

pysbd as a spacy component through entrypoints

* 🚨  Fix universe.json

* 📝  Update code_example

											
										
										
											2019-10-30 14:13:29 +03:00
+								            "github": "nipunsadvilkar/pySBD",
-												Update universe.json [ci skip]

											
										
										
											2019-10-30 15:29:00 +03:00
+								            "description": "pySBD is 'real-world' sentence segmenter which extracts reasonable sentences when the format and domain of the input text are unknown. It is a rules-based algorithm based on [The Golden Rules](https://s3.amazonaws.com/tm-town-nlp-resources/golden_rules.txt) - a set of tests to check accuracy of segmenter in regards to edge case scenarios developed by [TM-Town](https://www.tm-town.com/) dev team. pySBD is python port of ruby gem [Pragmatic Segmenter](https://github.com/diasks2/pragmatic_segmenter).",
-												✨  project: pySBD - Python Sentence Boundary Disambiguation (#4455)

* ✨  project: pySBD - Python Sentence Boundary Disambiguation

* 📝  Update links and description

* 🐛  Fix missing comma

* Update universe.json

pysbd as a spacy component through entrypoints

* 🚨  Fix universe.json

* 📝  Update code_example

											
										
										
											2019-10-30 14:13:29 +03:00
+								            "pip": "pysbd",
 								            "category": ["scientific"],
 								            "tags": ["sentence segmentation"],
 								            "code_example": [
 								                "from pysbd.util import PySBDFactory",
 								                "",
 								                "nlp = spacy.blank('en')",
 								                "nlp.add_pipe(PySBDFactory(nlp))",
 								                "",
 								                "doc = nlp('My name is Jonas E. Smith. Please turn to p. 55.')",
 								                "print(list(doc.sents))",
 								                "# [My name is Jonas E. Smith., Please turn to p. 55.]"
 								            ],
 								            "author": "Nipun Sadvilkar",
 								            "author_links": {
 								                "twitter": "nipunsadvilkar",
 								                "github": "nipunsadvilkar",
 								                "website": "https://nipunsadvilkar.github.io"
-												Update universe.json [ci skip]

											
										
										
											2019-10-30 15:29:00 +03:00
+								            }
-												✨  project: pySBD - Python Sentence Boundary Disambiguation (#4455)

* ✨  project: pySBD - Python Sentence Boundary Disambiguation

* 📝  Update links and description

* 🐛  Fix missing comma

* Update universe.json

pysbd as a spacy component through entrypoints

* 🚨  Fix universe.json

* 📝  Update code_example

											
										
										
											2019-10-30 14:13:29 +03:00
+								        },
-												Add cookiecutter-spacy-fastapi to spacy universe (#4498)


											
										
										
											2019-10-22 15:50:40 +03:00
+								        {
 								            "id": "cookiecutter-spacy-fastapi",
 								            "title": "cookiecutter-spacy-fastapi",
 								            "slogan": "Docker-based cookiecutter for easy spaCy APIs using FastAPI",
-												Update universe.json [ci skip]

											
										
										
											2019-10-22 15:54:47 +03:00
+								            "description": "Docker-based cookiecutter for easy spaCy APIs using FastAPI. The default endpoints expect batch requests with a list of Records in the Azure Search Cognitive Skill format. So out of the box, this cookiecutter can be setup as a Custom Cognitive Skill. For more on Azure Search and Cognitive Skills [see this page](https://docs.microsoft.com/en-us/azure/search/cognitive-search-custom-skill-interface).",
-												Add cookiecutter-spacy-fastapi to spacy universe (#4498)


											
										
										
											2019-10-22 15:50:40 +03:00
+								            "url": "https://github.com/microsoft/cookiecutter-spacy-fastapi",
 								            "image": "https://raw.githubusercontent.com/microsoft/cookiecutter-spacy-fastapi/master/images/cookiecutter-docs.png",
 								            "github": "microsoft/cookiecutter-spacy-fastapi",
 								            "category": ["apis"],
 								            "thumb": "https://avatars0.githubusercontent.com/u/6154722",
 								            "author": "Microsoft",
 								            "author_links": {
 								                "github": "microsoft"
 								            }
-												add dframcy to universe.json (#4580)


											
										
										
											2019-11-04 15:53:23 +03:00
+								        },
 								        {
 								            "id": "dframcy",
 								            "title": "Dframcy",
 								            "slogan": "Dataframe Integration with spaCy NLP",
 								            "github": "yash1994/dframcy",
 								            "description": "DframCy is a light-weight utility module to integrate Pandas Dataframe to spaCy's linguistic annotation and training tasks.",
 								            "pip": "dframcy",
-												Update universe.json [ci skip]

											
										
										
											2019-11-04 15:55:55 +03:00
+								            "category": ["pipeline", "training"],
 								            "tags": ["pandas"],
-												add dframcy to universe.json (#4580)


											
										
										
											2019-11-04 15:53:23 +03:00
+								            "code_example": [
-												Update universe.json [ci skip]

											
										
										
											2019-11-04 15:55:55 +03:00
+								                "import spacy",
 								                "from dframcy import DframCy",
 								                "",
 								                "nlp = spacy.load('en_core_web_sm')",
 								                "dframcy = DframCy(nlp)",
 								                "doc = dframcy.nlp(u'Apple is looking at buying U.K. startup for $1 billion')",
-												add dframcy to universe.json (#4580)


											
										
										
											2019-11-04 15:53:23 +03:00
+								                "annotation_dataframe = dframcy.to_dataframe(doc)"
 								            ],
 								            "author": "Yash Patadia",
 								            "author_links": {
 								                "twitter": "PatadiaYash",
 								                "github": "yash1994"
 								            }
-												Submitting `PyTextRank` for inclusion in the spaCy uniVerse (#4942)

* submitting PyTextRank for consideration of including in the spaCy uniVerse

* including SCA

											
										
										
											2020-01-28 13:37:54 +03:00
+								        },
 								        {
 								            "id": "spacy-pytextrank",
 								            "title": "PyTextRank",
 								            "slogan": "Py impl of TextRank for lightweight phrase extraction",
 								            "description": "An implementation of TextRank in Python for use in spaCy pipelines which provides fast, effective phrase extraction from texts, along with extractive summarization. The graph algorithm works independent of a specific natural language and does not require domain knowledge. See (Mihalcea 2004) https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf",
 								            "github": "DerwenAI/pytextrank",
 								            "pip": "pytextrank",
 								            "code_example": [
 								                "import spacy",
 								                "import pytextrank",
 								                "",
 								                "nlp = spacy.load('en_core_web_sm')",
 								                "",
 								                "tr = pytextrank.TextRank()",
 								                "nlp.add_pipe(tr.PipelineComponent, name='textrank', last=True)",
 								                "",
 								                "text = 'Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered.'",
 								                "doc = nlp(text)",
 								                "",
 								                "# examine the top-ranked phrases in the document",
 								                "for p in doc._.phrases:",
 								                "    print('{:.4f} {:5d}  {}'.format(p.rank, p.count, p.text))",
 								                "    print(p.chunks)"
 								            ],
 								            "code_language": "python",
 								            "url": "https://github.com/DerwenAI/pytextrank/wiki",
 								            "thumb": "https://memegenerator.net/img/instances/66942896.jpg",
 								            "image": "https://memegenerator.net/img/instances/66942896.jpg",
 								            "author": "Paco Nathan",
 								            "author_links": {
 								                "twitter": "pacoid",
 								                "github": "ceteri",
 								                "website": "https://derwen.ai/paco"
 								            },
 								            "category": ["pipeline"],
 								            "tags": ["phrase extraction", "ner", "summarization", "graph algorithms", "textrank"]
-												add spacy_syllables to universe + sign contributor agreement

											
										
										
											2020-03-13 19:58:29 +03:00
+								        },
 								        {
 								            "id": "spacy_syllables",
 								            "title": "Spacy Syllables",
 								            "slogan": "Multilingual syllable annotations",
 								            "description": "Spacy Syllables is a pipeline component that adds multilingual syllable annotations to Tokens. It uses Pyphen under the hood and has support for a long list of languages.",
 								            "github": "sloev/spacy-syllables",
 								            "pip": "spacy_syllables",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_syllables import SpacySyllables",
 								                "",
 								                "nlp = spacy.load('en_core_web_sm')",
 								                "syllables = SpacySyllables(nlp)",
 								                "nlp.add_pipe(syllables, after='tagger')",
 								                "",
 								                "doc = nlp('terribly long')",
 								                "",
 								                "data = [",
 								                "    (token.text, token._.syllables, token._.syllables_count)",
 								                "    for token in doc",
 								                "]",
 								                "",
 								                "assert data == [",
 								                "    ('terribly', ['ter', 'ri', 'bly'], 3),",
 								                "    ('long', ['long'], 1)",
 								                "]"
 								            ],
 								            "thumb": "https://raw.githubusercontent.com/sloev/spacy-syllables/master/logo.png",
 								            "author": "Johannes Valbjørn",
 								            "author_links": {
 								                "github": "sloev"
 								            },
 								            "category": ["pipeline"],
 								            "tags": ["syllables", "multilingual"]
-												add gobbli to spacy-universe 🥳

											
										
										
											2020-03-17 15:30:29 +03:00
+								        },
 								        {
 								            "id": "gobbli",
 								            "title": "gobbli",
 								            "slogan": "Deep learning for text classification doesn't have to be scary",
-												Update universe.json [ci skip]

											
										
										
											2020-03-18 00:19:29 +03:00
+								            "description": "gobbli is a Python library which wraps several modern deep learning models in a uniform interface that makes it easy to evaluate feasibility and conduct analyses. It leverages the abstractive powers of Docker to hide nearly all dependency management and functional differences between models from the user. It also contains an interactive app for exploring text data and evaluating classification models. spaCy's base text classification models, as well as models integrated from `spacy-transformers`, are available in the collection of classification models. In addition, spaCy is used for data augmentation and document embeddings.",
-												add gobbli to spacy-universe 🥳

											
										
										
											2020-03-17 15:30:29 +03:00
+								            "url": "https://github.com/rtiinternational/gobbli",
 								            "github": "rtiinternational/gobbli",
 								            "pip": "gobbli",
 								            "thumb": "https://i.postimg.cc/NGpzhrdr/gobbli-lg.png",
 								            "code_example": [
 								                "from gobbli.io import PredictInput, TrainInput",
 								                "from gobbli.model.bert import BERT",
 								                "",
 								                "train_input = TrainInput(",
 								                "    X_train=['This is a training document.', 'This is another training document.'],",
 								                "    y_train=['0', '1'],",
 								                "    X_valid=['This is a validation sentence.', 'This is another validation sentence.'],",
 								                "    y_valid=['1', '0'],",
 								                ")",
 								                "",
 								                "clf = BERT()",
 								                "",
 								                "# Set up classifier resources -- Docker image, etc.",
 								                "clf.build()",
 								                "",
 								                "# Train model",
 								                "train_output = clf.train(train_input)",
 								                "",
 								                "predict_input = PredictInput(",
 								                "    X=['Which class is this document?'],",
 								                "    labels=train_output.labels,",
 								                "    checkpoint=train_output.checkpoint,",
 								                ")",
 								                "",
 								                "predict_output = clf.predict(predict_input)"
 								            ],
 								            "category": ["standalone"]
-												Add spacy_fastlang to universe (#5271)

* Add spacy_fastlang to universe

* Sign SCA
											
										
										
											2020-04-15 14:50:46 +03:00
+								        },
 								        {
 								            "id": "spacy_fastlang",
 								            "title": "Spacy FastLang",
 								            "slogan": "Language detection done fast",
 								            "description": "Fast language detection using FastText and Spacy.",
 								            "github": "thomasthiebaud/spacy-fastlang",
 								            "pip": "spacy_fastlang",
 								            "code_example": [
 								                "import spacy",
 								                "from spacy_fastlang import LanguageDetector",
 								                "",
 								                "nlp = spacy.load('en_core_web_sm')",
 								                "nlp.add_pipe(LanguageDetector())",
-												Remove the nested quote
											
										
										
											2020-05-23 18:58:19 +03:00
+								                "doc = nlp('Life is like a box of chocolates. You never know what you are gonna get.')",
-												Add spacy_fastlang to universe (#5271)

* Add spacy_fastlang to universe

* Sign SCA
											
										
										
											2020-04-15 14:50:46 +03:00
+								                "",
 								                "assert doc._.language == 'en'",
 								                "assert doc._.language_score >= 0.8"
 								            ],
 								            "author": "Thomas Thiebaud",
 								            "author_links": {
 								                "github": "thomasthiebaud"
 								            },
 								            "category": ["pipeline"]
-												Add mlflow to spaCy universe (#5352)

* Add mlflow to universe

* Use mlflow black logo
											
										
										
											2020-04-29 11:18:03 +03:00
+								        },
 								        {
 								            "id": "mlflow",
 								            "title": "MLflow",
 								            "slogan": "An open source platform for the machine learning lifecycle",
 								            "description": "MLflow is an open source platform to manage the ML lifecycle, including experimentation, reproducibility, deployment, and a central model registry. MLflow currently offers four components: Tracking, Projects, Models and Registry.",
 								            "github": "mlflow/mlflow",
 								            "pip": "mlflow",
 								            "thumb": "https://www.mlflow.org/docs/latest/_static/MLflow-logo-final-black.png",
 								            "image": "",
 								            "url": "https://mlflow.org/",
 								            "author": "Databricks",
 								            "author_links": {
 								                "github": "databricks",
 								                "twitter": "databricks",
 								                "website": "https://databricks.com/"
 								            },
 								            "category": ["standalone", "apis"],
 								            "code_example": [
 								                "import mlflow",
 								                "import mlflow.spacy",
 								                "",
 								                "# MLflow Tracking",
 								                "nlp = spacy.load('my_best_model_path/output/model-best')",
 								                "with mlflow.start_run(run_name='Spacy'):",
 								                "    mlflow.set_tag('model_flavor', 'spacy')",
 								                "    mlflow.spacy.log_model(spacy_model=nlp, artifact_path='model')",
 								                "    mlflow.log_metric(('accuracy', 0.72))",
 								                "    my_run_id = mlflow.active_run().info.run_id",
 								                "",
 								                "",
 								                "# MLflow Models",
 								                "model_uri = f'runs:/{my_run_id}/model'",
 								                "nlp2 = mlflow.spacy.load_model(model_uri=model_uri)"
 								            ]
-												Update universe.json
											
										
										
											2020-05-20 06:12:21 +03:00
+								        },
 								        {
 								            "id": "pyate",
 								            "title": "PyATE",
 								            "slogan": "Python Automated Term Extraction",
 								            "description": "PyATE is a term extraction library written in Python using Spacy POS tagging with Basic, Combo Basic, C-Value, TermExtractor, and Weirdness.",
 								            "github": "kevinlu1248/pyate",
 								            "pip": "pyate",
 								            "code_example": [
-												Changed pyate code example in universe.json
											
										
										
											2020-05-20 19:11:32 +03:00
+								                "import spacy",
-												Update universe and display of videos [ci skip]

											
										
										
											2020-05-21 22:54:23 +03:00
+								                "from pyate.term_extraction_pipeline import TermExtractionPipeline",
 								                "",
 								                "nlp = spacy.load('en_core_web_sm')",
 								                "nlp.add_pipe(TermExtractionPipeline())",
 								                "# source: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1994795/",
 								                "string = 'Central to the development of cancer are genetic changes that endow these “cancer cells” with many of the hallmarks of cancer, such as self-sufficient growth and resistance to anti-growth and pro-death signals. However, while the genetic changes that occur within cancer cells themselves, such as activated oncogenes or dysfunctional tumor suppressors, are responsible for many aspects of cancer development, they are not sufficient. Tumor promotion and progression are dependent on ancillary processes provided by cells of the tumor environment but that are not necessarily cancerous themselves. Inflammation has long been associated with the development of cancer. This review will discuss the reflexive relationship between cancer and inflammation with particular focus on how considering the role of inflammation in physiologic processes such as the maintenance of tissue homeostasis and repair may provide a logical framework for understanding the connection between the inflammatory response and cancer.'",
 								                "",
 								                "doc = nlp(string)",
 								                "print(doc._.combo_basic.sort_values(ascending=False).head(5))",
 								                "\"\"\"\"\"\"",
 								                "dysfunctional tumor                1.443147",
 								                "tumor suppressors                  1.443147",
 								                "genetic changes                    1.386294",
 								                "cancer cells                       1.386294",
 								                "dysfunctional tumor suppressors    1.298612",
 								                "\"\"\"\"\"\""
-												Update universe.json
											
										
										
											2020-05-20 06:12:21 +03:00
+								            ],
 								            "code_language": "python",
 								            "url": "https://github.com/kevinlu1248/pyate",
 								            "author": "Kevin Lu",
 								            "author_links": {
 								                "twitter": "kevinlu1248",
 								                "github": "kevinlu1248",
 								                "website": "https://github.com/kevinlu1248/pyate"
 								            },
 								            "category": ["pipeline", "research"],
 								            "tags": ["term_extraction"]
-												update spacy universe with my project (#5497)

* added contextualSpellCheck in spacy universe meta

* removed extra formatting by code

* updated with permanent links

* run json linter used by spacy

* filled SCA

* updated the description
											
										
										
											2020-05-25 12:30:23 +03:00
+								        },
 								        {
 								            "id": "contextualSpellCheck",
 								            "title": "Contextual Spell Check",
 								            "slogan": "Contextual spell correction using BERT (bidirectional representations)",
 								            "description": "This package currently focuses on Out of Vocabulary (OOV) word or non-word error (NWE) correction using BERT model. The idea of using BERT was to use the context when correcting NWE. In the coming days, I would like to focus on RWE and optimising the package by implementing it in cython.",
 								            "github": "R1j1t/contextualSpellCheck",
 								            "pip": "contextualSpellCheck",
 								            "code_example": [
 								                "import spacy",
 								                "import contextualSpellCheck",
 								                "",
 								                "nlp = spacy.load('en')",
 								                "contextualSpellCheck.add_to_pipe(nlp)",
 								                "doc = nlp('Income was $9.4 milion compared to the prior year of $2.7 milion.')",
 								                "",
 								                "print(doc._.performed_spellCheck) #Should be True",
 								                "print(doc._.outcome_spellCheck) #Income was $9.4 million compared to the prior year of $2.7 million."
 								            ],
 								            "code_language": "python",
 								            "url": "https://github.com/R1j1t/contextualSpellCheck",
 								            "thumb": "https://user-images.githubusercontent.com/22280243/82760949-98e68480-9e14-11ea-952e-4738620fd9e3.png",
 								            "image": "https://user-images.githubusercontent.com/22280243/82138959-2852cd00-9842-11ea-918a-49b2a7873ef6.png",
 								            "author": "Rajat Goel",
 								            "author_links": {
 								                "github": "r1j1t",
 								                "website": "https://github.com/R1j1t"
 								            },
 								            "category": ["pipeline", "conversational", "research"],
 								            "tags": ["spell check", "correction", "preprocessing", "translation", "correction"]
-												Add texthero to universe.json (#5716)

* Add texthero to universe.json

* Add spaCy contributor Agreement
											
										
										
											2020-07-07 21:54:22 +03:00
+								        },
 								        {
 								            "id": "texthero",
 								            "title": "Texthero",
 								            "slogan": "Text preprocessing, representation and visualization from zero to hero.",
 								            "description": "Texthero is a python package to work with text data efficiently. It empowers NLP developers with a tool to quickly understand any text-based dataset and it provides a solid pipeline to clean and represent text data, from zero to hero.",
 								            "github": "jbesomi/texthero",
 								            "pip": "texthero",
 								            "code_example": [
 								                "import texthero as hero",
 								                "import pandas as pd",
 								                "",
 								                "df = pd.read_csv('https://github.com/jbesomi/texthero/raw/master/dataset/bbcsport.csv')",
 								                "df['named_entities'] = hero.named_entities(df['text'])",
 								                "df.head()"
 								            ],
 								            "code_language": "python",
 								            "url": "https://texthero.org",
 								            "thumb": "https://texthero.org/img/T.png",
 								            "image": "https://texthero.org/docs/assets/texthero.png",
 								            "author": "Jonathan Besomi",
 								            "author_links": {
 								                "github": "jbesomi",
 								                "website": "https://besomi.ai"
 								            },
-												Fix and update universe.json [ci skip]

											
										
										
											2020-07-07 22:12:28 +03:00
+								            "category": ["standalone"]
-												Add VA COVID-19 NLP project to spaCy Universe (#5777)

* Update universe.json

Add cov-bsv to "resources"

* Update universe.json

* add contributor agreement
											
										
										
											2020-07-19 14:35:31 +03:00
+								        },
 								        {
 								            "id": "cov-bsv",
 								            "title": "VA COVID-19 NLP BSV",
 								            "slogan": "spaCy pipeline for COVID-19 surveillance.",
 								            "github": "abchapman93/VA_COVID-19_NLP_BSV",
 								            "description": "A spaCy rule-based pipeline for identifying positive cases of COVID-19 from clinical text. A version of this system was deployed as part of the US Department of Veterans Affairs biosurveillance response to COVID-19.",
 								            "pip": "cov-bsv",
 								            "code_example": [
 								              "import cov_bsv",
 								              "",
 								              "nlp = cov_bsv.load()",
 								              "text = 'Pt tested for COVID-19. His wife was recently diagnosed with novel coronavirus. SARS-COV-2: Detected'",
 								              "",
 								              "print(doc.ents)",
 								              "print(doc._.cov_classification)",
 								              "cov_bsv.visualize_doc(doc)"
 								            ],
 								            "category": ["pipeline", "standalone", "biomedical", "scientific"],
 								            "tags": ["clinical", "epidemiology", "covid-19", "surveillance"],
 								            "author": "Alec Chapman",
 								            "author_links": {
 								                "github": "abchapman93"
 								            }
-												Fix universe.json [ci skip]

											
										
										
											2019-08-27 13:13:42 +03:00
+								        }
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								    ],
-												Adjust wording and formatting [ci skip]

											
										
										
											2019-05-03 13:00:31 +03:00
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								    "categories": [
 								        {
 								            "label": "Projects",
 								            "items": [
 								                {
 								                    "id": "pipeline",
 								                    "title": "Pipeline",
 								                    "description": "Custom pipeline components and extensions"
 								                },
 								                {
 								                    "id": "training",
 								                    "title": "Training",
 								                    "description": "Helpers and toolkits for training spaCy models"
 								                },
 								                {
 								                    "id": "conversational",
 								                    "title": "Conversational",
 								                    "description": "Frameworks and utilities for working with conversational text, e.g. for chat bots"
 								                },
 								                {
 								                    "id": "research",
 								                    "title": "Research",
 								                    "description": "Frameworks and utilities for developing better NLP models, especially using neural networks"
 								                },
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								                {
 								                    "id": "scientific",
 								                    "title": "Scientific",
 								                    "description": "Frameworks and utilities for scientific text processing"
 								                },
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								                {
 								                    "id": "visualizers",
 								                    "title": "Visualizers",
 								                    "description": "Demos and tools to visualize NLP annotations or systems"
 								                },
 								                {
 								                    "id": "apis",
 								                    "title": "Containers & APIs",
 								                    "description": "Infrastructure tools for managing or deploying spaCy"
 								                },
 								                {
 								                    "id": "nonpython",
 								                    "title": "Non-Python",
 								                    "description": "Wrappers, bindings and implementations in other programming languages"
 								                },
 								                {
 								                    "id": "standalone",
 								                    "title": "Standalone",
 								                    "description": "Self-contained libraries or tools that use spaCy under the hood"
-												Update universe [ci skip]

											
										
										
											2019-06-02 13:58:12 +03:00
+								                },
 								                {
 								                    "id": "models",
 								                    "title": "Models",
-												Use consistent spelling

											
										
										
											2019-10-02 11:37:39 +03:00
+								                    "description": "Third-party pretrained models for different languages and domains"
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								                }
 								            ]
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        },
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								        {
 								            "label": "Education",
 								            "items": [
 								                {
 								                    "id": "books",
 								                    "title": "Books",
 								                    "description": "Books about or featuring spaCy"
 								                },
 								                {
 								                    "id": "courses",
 								                    "title": "Courses",
 								                    "description": "Online courses and interactive tutorials"
 								                },
 								                {
 								                    "id": "videos",
 								                    "title": "Videos",
 								                    "description": "Talks and tutorials in video format"
 								                },
 								                {
 								                    "id": "podcasts",
 								                    "title": "Podcasts",
 								                    "description": "Episodes about spaCy or interviews with the spaCy team"
 								                }
 								            ]
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								        }
-												💫 Update website (#3285)

<!--- Provide a general summary of your changes in the title. -->

## Description

The new website is implemented using [Gatsby](https://www.gatsbyjs.org) with [Remark](https://github.com/remarkjs/remark) and [MDX](https://mdxjs.com/). This allows authoring content in **straightforward Markdown** without the usual limitations. Standard elements can be overwritten with powerful [React](http://reactjs.org/) components and wherever Markdown syntax isn't enough, JSX components can be used. Hopefully, this update will also make it much easier to contribute to the docs. Once this PR is merged, I'll implement auto-deployment via [Netlify](https://netlify.com) on a specific branch (to avoid building the website on every PR). There's a bunch of other cool stuff that the new setup will allow us to do – including writing front-end tests, service workers, offline support, implementing a search and so on.

This PR also includes various new docs pages and content.
Resolves #3270. Resolves #3222. Resolves #2947. Resolves #2837.


### Types of change
enhancement

## Checklist
<!--- Before you submit the PR, go over this checklist and make sure you can
tick off all the boxes. [] -> [x] -->
- [x] I have submitted the spaCy Contributor Agreement.
- [x] I ran the tests, and all new and existing tests passed.
- [x] My changes don't require a change to the documentation, or if they do, I've added all required information.

											
										
										
											2019-02-17 21:31:19 +03:00
+								    ]
-												💫 Interactive code examples, spaCy Universe and various docs improvements (#2274)

* Integrate Python kernel via Binder

* Add live model test for languages with examples

* Update docs and code examples

* Adjust margin (if not bootstrapped)

* Add binder version to global config

* Update terminal and executable code mixins

* Pass attributes through infobox and section

* Hide v-cloak

* Fix example

* Take out model comparison for now

* Add meta text for compat

* Remove chart.js dependency

* Tidy up and simplify JS and port big components over to Vue

* Remove chartjs example

* Add Twitter icon

* Add purple stylesheet option

* Add utility for hand cursor (special cases only)

* Add transition classes

* Add small option for section

* Add thumb object for small round thumbnail images

* Allow unset code block language via "none" value

(workaround to still allow unset language to default to DEFAULT_SYNTAX)

* Pass through attributes

* Add syntax highlighting definitions for Julia, R and Docker

* Add website icon

* Remove user survey from navigation

* Don't hide GitHub icon on small screens

* Make top navigation scrollable on small screens

* Remove old resources page and references to it

* Add Universe

* Add helper functions for better page URL and title

* Update site description

* Increment versions

* Update preview images

* Update mentions of resources

* Fix image

* Fix social images

* Fix problem with cover sizing and floats

* Add divider and move badges into heading

* Add docstrings

* Reference converting section

* Add section on converting word vectors

* Move converting section to custom section and fix formatting

* Remove old fastText example

* Move extensions content to own section

Keep weird ID to not break permalinks for now (we don't want to rewrite URLs if not absolutely necessary)

* Use better component example and add factories section

* Add note on larger model

* Use better example for non-vector

* Remove similarity in context section

Only works via small models with tensors so has always been kind of confusing

* Add note on init-model command

* Fix lightning tour examples and make excutable if possible

* Add spacy train CLI section to train

* Fix formatting and add video

* Fix formatting

* Fix textcat example description (resolves #2246)

* Add dummy file to try resolve conflict

* Delete dummy file

* Tidy up [ci skip]

* Ensure sufficient height of loading container

* Add loading animation to universe

* Update Thebelab build and use better startup message

* Fix asset versioning

* Fix typo [ci skip]

* Add note on project idea label

											
										
										
											2018-04-29 03:06:46 +03:00
+								}