spaCy/website/index.jade

//- 💫 LANDING PAGE

include _includes/_mixins

+landing-header
    h1.c-landing__title.u-heading-0
        | Industrial-Strength#[br]
        | Natural Language#[br]
        | Processing

    h2.c-landing__title.o-block.u-heading-3
        span.u-text-label.u-text-label--light in Python

+grid.o-content.c-landing__blocks
    +grid-col("third").c-landing__card.o-card.o-grid.o-grid--space
        +h(3) Fastest in the world
        p
            |  spaCy excels at large-scale information extraction tasks.
            |  It's written from the ground up in carefully memory-managed
            |  Cython. Independent research has confirmed that spaCy is
            |  the fastest in the world.  If your application needs to
            |  process entire web dumps, spaCy is the library you want to
            |  be using.

        +button("/usage/facts-figures", true, "primary")
            |  Facts & figures

    +grid-col("third").c-landing__card.o-card.o-grid.o-grid--space
        +h(3) Get things done
        p
            |  spaCy is designed to help you do real work — to build real
            |  products, or gather real insights. The library respects
            |  your time, and tries to avoid wasting it. It's easy to
            |  install, and its API is simple and productive. We like to
            |  think of spaCy as the Ruby on Rails of Natural Language
            |  Processing.

        +button("/usage", true, "primary")
            |  Get started

    +grid-col("third").c-landing__card.o-card.o-grid.o-grid--space
        +h(3) Deep learning
        p
            |  spaCy is the best way to prepare text for deep learning.
            |  It interoperates seamlessly with TensorFlow, PyTorch,
            |  scikit-learn, Gensim and the rest of Python's awesome AI
            |  ecosystem. With spaCy, you can easily construct linguistically
            |  sophisticated statistical models for a variety of NLP problems.

        +button("/usage/training", true, "primary")
            |  Read more

.o-content
    +grid
        +grid-col("two-thirds")
            +code-exec("Edit the code & try spaCy", true).
                # pip install spacy
                # python -m spacy download en_core_web_sm

                import spacy

                # Load English tokenizer, tagger, parser, NER and word vectors
                nlp = spacy.load('en_core_web_sm')

                # Process whole documents
                text = (u"When Sebastian Thrun started working on self-driving cars at "
                        u"Google in 2007, few people outside of the company took him "
                        u"seriously. “I can tell you very senior CEOs of major American "
                        u"car companies would shake my hand and turn away because I wasn’t "
                        u"worth talking to,” said Thrun, now the co-founder and CEO of "
                        u"online higher education startup Udacity, in an interview with "
                        u"Recode earlier this week.")
                doc = nlp(text)

                # Find named entities, phrases and concepts
                for entity in doc.ents:
                    print(entity.text, entity.label_)

                # Determine semantic similarities
                doc1 = nlp(u"my fries were super gross")
                doc2 = nlp(u"such disgusting fries")
                similarity = doc1.similarity(doc2)
                print(doc1.text, doc2.text, similarity)

        +grid-col("third")
            +h(2) Features
            +list
                +item Non-destructive #[strong tokenization]
                +item #[strong Named entity] recognition
                +item Support for #[strong #{LANG_COUNT}+ languages]
                +item #[strong #{MODEL_COUNT} statistical models] for #{MODEL_LANG_COUNT} languages
                +item Pre-trained #[strong word vectors]
                +item Easy #[strong deep learning] integration
                +item Part-of-speech tagging
                +item Labelled dependency parsing
                +item Syntax-driven sentence segmentation
                +item Built in #[strong visualizers] for syntax and NER
                +item Convenient string-to-hash mapping
                +item Export to numpy data arrays
                +item Efficient binary serialization
                +item Easy #[strong model packaging] and deployment
                +item State-of-the-art speed
                +item Robust, rigorously evaluated accuracy

+landing-banner("Convolutional neural network models", "New in v2.0")
    p
        |  spaCy v2.0 features new neural models for #[strong tagging],
        |  #[strong parsing] and #[strong entity recognition]. The models have
        |  been designed and implemented from scratch specifically for spaCy, to
        |  give you an unmatched balance of speed, size and accuracy. A novel
        |  bloom embedding strategy with subword features is used to support
        |  huge vocabularies in tiny tables. Convolutional layers with residual
        |  connections, layer normalization and maxout non-linearity are used,
        |  giving much better efficiency than the standard BiLSTM solution.
        |  Finally, the parser and NER use an imitation learning objective to
        |  deliver accuracy in-line with the latest research systems,
        |  even when  evaluated from raw text. With these innovations, spaCy
        |  v2.0's models are #[strong 10&times; smaller],
        |  #[strong 20% more accurate], and #[strong even cheaper to run] than
        |  the previous generation.

    .o-block-small.u-text-right
        +button("/models", true, "secondary-light") Download models

+landing-logos("spaCy is trusted by", logos)
    +button(gh("spacy") + "/stargazers", false, "secondary", "small")
        |  and many more

+landing-logos("Featured on", features).o-block-small

+landing-banner("Prodigy: Radically efficient machine teaching", "From the makers of spaCy")
    p
        |  Prodigy is an #[strong annotation tool] so efficient that data scientists can
        |  do the annotation themselves, enabling a new level of rapid
        |  iteration. Whether you're working on entity recognition, intent
        |  detection or image classification, Prodigy can help you
        |  #[strong train and evaluate] your models faster. Stream in your own examples or
        |  real-world data from live APIs, update your model in real-time and
        |  chain models together to build more complex systems.

    .o-block-small.u-text-right
        +button("https://prodi.gy", true, "secondary-light") Try it out

.o-content
    +grid
        +grid-col("half")
            +h(2) Benchmarks

            p
                |  In 2015, independent researchers from Emory University and
                |  Yahoo! Labs showed that spaCy offered the
                |  #[strong fastest syntactic parser in the world] and that its
                |  accuracy was #[strong within 1% of the best] available
                |  (#[+a("https://aclweb.org/anthology/P/P15/P15-1038.pdf") Choi et al., 2015]).
                |  spaCy v2.0, released in 2017, is more accurate than any of
                |  the systems Choi et al. evaluated.

            .o-inline-list
                +button("/usage/facts-figures#benchmarks", true, "secondary") See details

        +grid-col("half")
            include usage/_facts-figures/_benchmarks-choi-2015