spaCy/website/pages/index.tsx

301 lines
14 KiB
TypeScript
Raw Normal View History

import React from 'react'
import PropTypes from 'prop-types'
import {
LandingHeader,
LandingTitle,
LandingSubtitle,
LandingGrid,
LandingCard,
2019-03-18 18:24:52 +03:00
LandingCol,
LandingDemo,
LandingBannerGrid,
LandingBanner,
} from '../src/components/landing'
import { H2 } from '../src/components/typography'
import { InlineCode } from '../src/components/code'
import { Ul, Li } from '../src/components/list'
import Button from '../src/components/button'
import Link from '../src/components/link'
2020-05-21 21:45:33 +03:00
import QuickstartTraining from '../src/widgets/quickstart-training'
import Project from '../src/widgets/project'
import Features from '../src/widgets/features'
import Layout from '../src/templates'
import courseImage from '../public/images/course.jpg'
import prodigyImage from '../public/images/prodigy_overview.jpg'
import projectsImage from '../public/images/projects.png'
import tailoredPipelinesImage from '../public/images/spacy-tailored-pipelines_wide.png'
import { nightly, legacy } from '../meta/dynamicMeta.mjs'
import Benchmarks from '../docs/usage/_benchmarks-models.mdx'
function getCodeExample(nightly) {
return `# pip install -U ${nightly ? 'spacy-nightly --pre' : 'spacy'}
2019-03-22 21:02:15 +03:00
# python -m spacy download en_core_web_sm
import spacy
2020-09-08 11:33:48 +03:00
# Load English tokenizer, tagger, parser and NER
2019-03-22 21:02:15 +03:00
nlp = spacy.load("en_core_web_sm")
# Process whole documents
2019-03-22 21:02:15 +03:00
text = ("When Sebastian Thrun started working on self-driving cars at "
"Google in 2007, few people outside of the company took him "
"seriously. “I can tell you very senior CEOs of major American "
"car companies would shake my hand and turn away because I wasnt "
"worth talking to,” said Thrun, in an interview with Recode earlier "
"this week.")
doc = nlp(text)
2019-03-22 21:02:15 +03:00
# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])
# Find named entities, phrases and concepts
for entity in doc.ents:
print(entity.text, entity.label_)
`
}
2022-12-20 19:41:38 +03:00
const Landing = () => {
const codeExample = getCodeExample(nightly)
return (
2022-11-14 19:29:29 +03:00
<Layout>
<LandingHeader nightly={nightly} legacy={legacy}>
<LandingTitle>
Industrial-Strength
<br />
Natural Language
<br />
Processing
</LandingTitle>
<LandingSubtitle>in Python</LandingSubtitle>
</LandingHeader>
<LandingGrid blocks>
<LandingCard title="Get things done" url="/usage/spacy-101" button="Get started">
spaCy is designed to help you do real work to build real products, or gather
real insights. The library respects your time, and tries to avoid wasting it.
2022-12-20 19:41:38 +03:00
It&apos;s easy to install, and its API is simple and productive.
</LandingCard>
<LandingCard
title="Blazing fast"
url="/usage/facts-figures"
button="Facts &amp; Figures"
>
2022-12-20 19:41:38 +03:00
spaCy excels at large-scale information extraction tasks. It&apos;s written from
the ground up in carefully memory-managed Cython. If your application needs to
2020-09-12 18:05:10 +03:00
process entire web dumps, spaCy is the library you want to be using.
2019-04-19 16:23:08 +03:00
</LandingCard>
2020-09-12 18:05:10 +03:00
<LandingCard title="Awesome ecosystem" url="/usage/projects" button="Read more">
In the five years since its release, spaCy has become an industry standard with
a huge ecosystem. Choose from a variety of plugins, integrate with your machine
learning stack and build custom components and workflows.
</LandingCard>
</LandingGrid>
<LandingGrid>
<LandingDemo title="Edit the code &amp; try spaCy">{codeExample}</LandingDemo>
2019-03-18 18:24:52 +03:00
<LandingCol>
<H2>Features</H2>
2020-10-15 12:16:06 +03:00
<Features />
2019-03-18 18:24:52 +03:00
</LandingCol>
</LandingGrid>
2020-09-12 18:05:10 +03:00
<LandingBannerGrid>
<LandingBanner
to="https://explosion.ai/custom-solutions"
2022-02-08 13:46:42 +03:00
button="Learn more"
background="#E4F4F9"
color="#1e1935"
2020-09-12 18:05:10 +03:00
small
>
<p>
<Link to="https://explosion.ai/custom-solutions" hidden>
<img src={tailoredPipelinesImage.src} alt="spaCy Tailored Pipelines" />
</Link>
</p>
<p>
<strong>
Get a custom spaCy pipeline, tailor-made for your NLP problem by
spaCy&apos;s core developers.
</strong>
</p>
2022-02-08 13:46:42 +03:00
<Ul>
<Li emoji="🔥">
<strong>Streamlined.</strong> Nobody knows spaCy better than we do. Send
2022-12-20 19:41:38 +03:00
us your pipeline requirements and we&apos;ll be ready to start producing
your solution in no time at all.
2022-02-08 13:46:42 +03:00
</Li>
<Li emoji="🐿 ">
<strong>Production ready.</strong> spaCy pipelines are robust and easy
2022-12-20 19:41:38 +03:00
to deploy. You&apos;ll get a complete spaCy project folder which is
ready to <InlineCode>spacy project run</InlineCode>.
2022-02-08 13:46:42 +03:00
</Li>
<Li emoji="🔮">
2022-12-20 19:41:38 +03:00
<strong>Predictable.</strong> You&apos;ll know exactly what you&apos;re
going to get and what it&apos;s going to cost. We quote fees up-front,
let you try before you buy, and don&apos;t charge for over-runs at our
end all the risk is on us.
2022-02-08 13:46:42 +03:00
</Li>
<Li emoji="🛠">
2022-12-20 19:41:38 +03:00
<strong>Maintainable.</strong> spaCy is an industry standard, and
we&apos;ll deliver your pipeline with full code, data, tests and
documentation, so your team can retrain, update and extend the solution
as your requirements change.
2022-02-08 13:46:42 +03:00
</Li>
</Ul>
2020-09-12 18:05:10 +03:00
</LandingBanner>
<LandingBanner
title="Prodigy: Radically efficient machine teaching"
label="From the makers of spaCy"
to="https://prodi.gy"
button="Try it out"
background="#f6f6f6"
color="#000"
small
>
<p>
<Link to="https://prodi.gy" hidden>
{/** Update image */}
<img
src={prodigyImage.src}
alt="Prodigy: Radically efficient machine teaching"
/>
</Link>
</p>
<p>
Prodigy is an <strong>annotation tool</strong> so efficient that data
scientists can do the annotation themselves, enabling a new level of rapid
iteration. Whether you&apos;re working on entity recognition, intent
detection or image classification, Prodigy can help you{' '}
<strong>train and evaluate</strong> your models faster.
</p>
2020-09-12 18:05:10 +03:00
</LandingBanner>
</LandingBannerGrid>
<LandingGrid cols={2} style={{ gridTemplateColumns: '1fr calc(80ch + 14rem)' }}>
<LandingCol>
<H2>Reproducible training for custom pipelines</H2>
<p>
spaCy v3.0 introduces a comprehensive and extensible system for{' '}
<strong>configuring your training runs</strong>. Your configuration file
will describe every detail of your training run, with no hidden defaults,
making it easy to <strong>rerun your experiments</strong> and track changes.
You can use the quickstart widget or the{' '}
<Link to="/api/cli#init-config">
<InlineCode>init config</InlineCode>
</Link>{' '}
command to get started, or clone a project template for an end-to-end
workflow.
</p>
<p>
<Button to="/usage/training">Get started</Button>
</p>
</LandingCol>
<LandingCol>
<QuickstartTraining />
</LandingCol>
</LandingGrid>
<LandingGrid cols={2}>
<LandingCol>
<Link to="/usage/projects" hidden>
2022-11-14 19:29:29 +03:00
<img src={projectsImage.src} />
2020-09-12 18:05:10 +03:00
</Link>
<br />
<br />
<br />
2020-09-20 18:44:58 +03:00
<Project id="pipelines/tagger_parser_ud" title="Get started">
The easiest way to get started is to clone a project template and run it
 for example, this template for training a{' '}
<strong>part-of-speech tagger</strong> and{' '}
<strong>dependency parser</strong> on a Universal Dependencies treebank.
2020-09-12 18:05:10 +03:00
</Project>
</LandingCol>
<LandingCol>
<H2>End-to-end workflows from prototype to production</H2>
<p>
2022-12-20 19:41:38 +03:00
spaCy&apos;s new project system gives you a smooth path from prototype to
2020-09-12 18:05:10 +03:00
production. It lets you keep track of all those{' '}
<strong>data transformation</strong>, preprocessing and{' '}
<strong>training steps</strong>, so you can make sure your project is always
ready to hand over for automation. It features source asset download,
command execution, checksum verification, and caching with a variety of
backends and integrations.
</p>
<p>
<Button to="/usage/projects">Try it out</Button>
</p>
</LandingCol>
</LandingGrid>
2020-10-16 12:46:33 +03:00
<LandingBannerGrid>
2022-02-08 13:46:42 +03:00
<LandingBanner
label="New in v3.0"
title="Transformer-based pipelines, new training system, project templates &amp; more"
to="/usage/v3"
button="See what's new"
small
>
<p>
spaCy v3.0 features all new <strong>transformer-based pipelines</strong>{' '}
that bring spaCy&apos;s accuracy right up to the current{' '}
<strong>state-of-the-art</strong>. You can use any pretrained transformer to
train your own pipelines, and even share one transformer between multiple
components with <strong>multi-task learning</strong>. Training is now fully
configurable and extensible, and you can define your own custom models using{' '}
<strong>PyTorch</strong>, <strong>TensorFlow</strong> and other frameworks.
</p>
2020-10-16 12:46:33 +03:00
</LandingBanner>
<LandingBanner
2021-01-27 04:39:47 +03:00
to="https://course.spacy.io"
button="Start the course"
2020-10-16 12:46:33 +03:00
background="#f6f6f6"
2021-01-27 04:39:47 +03:00
color="#252a33"
2020-10-16 12:46:33 +03:00
small
>
<p>
<Link to="https://course.spacy.io" hidden>
<img
src={courseImage.src}
alt="Advanced NLP with spaCy: A free online course"
/>
</Link>
</p>
<p>
In this <strong>free and interactive online course</strong> youll learn how
to use spaCy to build advanced natural language understanding systems, using
both rule-based and machine learning approaches. It includes{' '}
<strong>55 exercises</strong> featuring videos, slide decks, multiple-choice
questions and interactive coding practice in the browser.
</p>
2020-10-16 12:46:33 +03:00
</LandingBanner>
</LandingBannerGrid>
2020-09-12 18:05:10 +03:00
<LandingGrid cols={2} style={{ gridTemplateColumns: '1fr 60%' }}>
2019-03-18 18:24:52 +03:00
<LandingCol>
<H2>Benchmarks</H2>
<p>
2022-12-20 19:41:38 +03:00
spaCy v3.0 introduces transformer-based pipelines that bring spaCy&apos;s
2020-09-12 18:05:10 +03:00
accuracy right up to the current <strong>state-of-the-art</strong>. You can
also use a CPU-optimized pipeline, which is less accurate but much cheaper
to run.
</p>
<p>
2020-09-23 23:02:31 +03:00
<Button to="/usage/facts-figures#benchmarks">More results</Button>
</p>
2019-03-18 18:24:52 +03:00
</LandingCol>
2019-03-18 18:24:52 +03:00
<LandingCol>
2020-09-12 18:05:10 +03:00
<Benchmarks />
2019-03-18 18:24:52 +03:00
</LandingCol>
</LandingGrid>
2022-11-14 19:29:29 +03:00
</Layout>
)
}
2022-12-20 19:41:38 +03:00
export default Landing