mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Update global config and landing page
This commit is contained in:
parent
22dd929b65
commit
319fac14fe
|
@ -3,24 +3,22 @@
|
|||
"landing": true,
|
||||
"logos": [
|
||||
{
|
||||
"quora": [ "https://www.quora.com", 150 ],
|
||||
"chartbeat": [ "https://chartbeat.com", 200 ],
|
||||
"duedil": [ "https://www.duedil.com", 150 ],
|
||||
"stitchfix": [ "https://www.stitchfix.com", 190 ]
|
||||
"airbnb": [ "https://www.airbnb.com", 150, 45],
|
||||
"quora": [ "https://www.quora.com", 120, 34 ],
|
||||
"retriever": [ "https://www.retriever.no", 150, 33 ],
|
||||
"stitchfix": [ "https://www.stitchfix.com", 150, 18 ]
|
||||
},
|
||||
{
|
||||
"wayblazer": [ "http://wayblazer.com", 200 ],
|
||||
"indico": [ "https://indico.io", 150 ],
|
||||
"chattermill": [ "https://chattermill.io", 175 ],
|
||||
"turi": [ "https://turi.com", 150 ],
|
||||
"kip": [ "http://kipthis.com", 70 ]
|
||||
},
|
||||
"chartbeat": [ "https://chartbeat.com", 180, 25 ],
|
||||
"allenai": [ "https://allenai.org", 220, 37 ]
|
||||
}
|
||||
],
|
||||
"features": [
|
||||
{
|
||||
"socrata": [ "https://www.socrata.com", 150 ],
|
||||
"cytora": [ "http://www.cytora.com", 125 ],
|
||||
"signaln": [ "http://signaln.com", 150 ],
|
||||
"wonderflow": [ "http://www.wonderflow.co", 200 ],
|
||||
"synapsify": [ "http://www.gosynapsify.com", 150 ]
|
||||
"thoughtworks": ["https://www.thoughtworks.com/radar/tools", 150, 28],
|
||||
"wapo": ["https://www.washingtonpost.com/news/wonk/wp/2016/05/18/googles-new-artificial-intelligence-cant-understand-these-sentences-can-you/", 100, 77],
|
||||
"venturebeat": ["https://venturebeat.com/2017/01/27/4-ai-startups-that-analyze-customer-reviews/", 150, 19],
|
||||
"microsoft": ["https://www.microsoft.com/developerblog/2016/09/13/training-a-classifier-for-relation-extraction-from-medical-literature/", 130, 28]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -34,7 +32,24 @@
|
|||
"landing": true
|
||||
},
|
||||
|
||||
"announcement" : {
|
||||
"title": "Important Announcement"
|
||||
"styleguide": {
|
||||
"title": "Styleguide",
|
||||
"sidebar": {
|
||||
"Styleguide": { "": "styleguide" },
|
||||
"Resources": {
|
||||
"Website Source": "https://github.com/explosion/spacy/tree/master/website",
|
||||
"Contributing Guide": "https://github.com/explosion/spaCy/blob/master/CONTRIBUTING.md"
|
||||
}
|
||||
},
|
||||
"menu": {
|
||||
"Introduction": "intro",
|
||||
"Logo": "logo",
|
||||
"Colors": "colors",
|
||||
"Typography": "typography",
|
||||
"Elements": "elements",
|
||||
"Components": "components",
|
||||
"Embeds": "embeds",
|
||||
"Markup Reference": "markup"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,12 +11,9 @@
|
|||
"COMPANY": "Explosion AI",
|
||||
"COMPANY_URL": "https://explosion.ai",
|
||||
"DEMOS_URL": "https://demos.explosion.ai",
|
||||
"MODELS_REPO": "explosion/spacy-models",
|
||||
|
||||
"SPACY_VERSION": "1.8",
|
||||
"LATEST_NEWS": {
|
||||
"url": "https://github.com/explosion/spaCy/releases/tag/v2.0.0-alpha",
|
||||
"title": "Test spaCy v2.0.0 alpha!"
|
||||
},
|
||||
"SPACY_VERSION": "2.0",
|
||||
|
||||
"SOCIAL": {
|
||||
"twitter": "spacy_io",
|
||||
|
@ -27,25 +24,23 @@
|
|||
},
|
||||
|
||||
"NAVIGATION": {
|
||||
"Home": "/",
|
||||
"Usage": "/docs/usage",
|
||||
"Reference": "/docs/api",
|
||||
"Demos": "/docs/usage/showcase",
|
||||
"Blog": "https://explosion.ai/blog"
|
||||
"Usage": "/usage",
|
||||
"Models": "/models",
|
||||
"API": "/api"
|
||||
},
|
||||
|
||||
"FOOTER": {
|
||||
"spaCy": {
|
||||
"Usage": "/docs/usage",
|
||||
"API Reference": "/docs/api",
|
||||
"Tutorials": "/docs/usage/tutorials",
|
||||
"Showcase": "/docs/usage/showcase"
|
||||
"Usage": "/usage",
|
||||
"Models": "/models",
|
||||
"API Reference": "/api",
|
||||
"Resources": "/usage/resources"
|
||||
},
|
||||
"Support": {
|
||||
"Issue Tracker": "https://github.com/explosion/spaCy/issues",
|
||||
"StackOverflow": "http://stackoverflow.com/questions/tagged/spacy",
|
||||
"Reddit usergroup": "https://www.reddit.com/r/spacynlp/",
|
||||
"Gitter chat": "https://gitter.im/explosion/spaCy"
|
||||
"Reddit Usergroup": "https://www.reddit.com/r/spacynlp/",
|
||||
"Gitter Chat": "https://gitter.im/explosion/spaCy"
|
||||
},
|
||||
"Connect": {
|
||||
"Twitter": "https://twitter.com/spacy_io",
|
||||
|
@ -74,21 +69,11 @@
|
|||
{"id": "venv", "title": "virtualenv", "help": "Use a virtual environment and install spaCy into a user directory" },
|
||||
{"id": "gpu", "title": "GPU", "help": "Run spaCy on GPU to make it faster. Requires an NVDIA graphics card with CUDA 2+. See section below for more info."}]
|
||||
},
|
||||
{ "id": "model", "title": "Models", "multiple": true, "options": [
|
||||
{ "id": "en", "title": "English", "meta": "50MB" },
|
||||
{ "id": "de", "title": "German", "meta": "645MB" },
|
||||
{ "id": "fr", "title": "French", "meta": "1.33GB" },
|
||||
{ "id": "es", "title": "Spanish", "meta": "377MB"}]
|
||||
}
|
||||
{ "id": "model", "title": "Models", "multiple": true }
|
||||
],
|
||||
|
||||
"QUICKSTART_MODELS": [
|
||||
{ "id": "lang", "title": "Language", "options": [
|
||||
{ "id": "en", "title": "English", "checked": true },
|
||||
{ "id": "de", "title": "German" },
|
||||
{ "id": "fr", "title": "French" },
|
||||
{ "id": "es", "title": "Spanish" }]
|
||||
},
|
||||
{ "id": "lang", "title": "Language"},
|
||||
{ "id": "load", "title": "Loading style", "options": [
|
||||
{ "id": "spacy", "title": "Use spacy.load()", "checked": true, "help": "Use spaCy's built-in loader to load the model by name." },
|
||||
{ "id": "module", "title": "Import as module", "help": "Import the model explicitly as a Python module." }]
|
||||
|
@ -98,50 +83,15 @@
|
|||
}
|
||||
],
|
||||
|
||||
"MODELS": {
|
||||
"en": [
|
||||
{ "id": "en_core_web_sm", "lang": "English", "feats": [1, 1, 1, 1], "size": "50 MB", "license": "CC BY-SA", "def": true },
|
||||
{ "id": "en_core_web_md", "lang": "English", "feats": [1, 1, 1, 1], "size": "1 GB", "license": "CC BY-SA" },
|
||||
{ "id": "en_depent_web_md", "lang": "English", "feats": [1, 1, 1, 0], "size": "328 MB", "license": "CC BY-SA" },
|
||||
{ "id": "en_vectors_glove_md", "lang": "English", "feats": [1, 0, 0, 1], "size": "727 MB", "license": "CC BY-SA" }
|
||||
],
|
||||
"de": [
|
||||
{ "id": "de_core_news_md", "lang": "German", "feats": [1, 1, 1, 1], "size": "645 MB", "license": "CC BY-SA" }
|
||||
],
|
||||
"fr": [
|
||||
{ "id": "fr_depvec_web_lg", "lang": "French", "feats": [1, 1, 0, 1], "size": "1.33 GB", "license": "CC BY-NC" }
|
||||
],
|
||||
"es": [
|
||||
{ "id": "es_core_web_md", "lang": "Spanish", "feats": [1, 1, 1, 1], "size": "377 MB", "license": "CC BY-SA"}
|
||||
]
|
||||
},
|
||||
|
||||
"EXAMPLE_SENTENCES": {
|
||||
"en": "This is a sentence.",
|
||||
"de": "Dies ist ein Satz.",
|
||||
"fr": "C'est une phrase.",
|
||||
"es": "Esto es una frase."
|
||||
},
|
||||
|
||||
"ALPHA": true,
|
||||
"V_CSS": "1.6",
|
||||
"V_JS": "1.2",
|
||||
"V_CSS": "2.0",
|
||||
"V_JS": "2.0",
|
||||
"DEFAULT_SYNTAX": "python",
|
||||
"ANALYTICS": "UA-58931649-1",
|
||||
"MAILCHIMP": {
|
||||
"user": "spacy.us12",
|
||||
"id": "83b0498b1e7fa3c91ce68c3f1",
|
||||
"list": "89ad33e698"
|
||||
},
|
||||
"BADGES": {
|
||||
"pipy": {
|
||||
"badge": "https://img.shields.io/pypi/v/spacy.svg?style=flat-square",
|
||||
"link": "https://pypi.python.org/pypi/spacy"
|
||||
},
|
||||
"conda": {
|
||||
"badge": "https://anaconda.org/conda-forge/spacy/badges/version.svg",
|
||||
"link": "https://anaconda.org/conda-forge/spacy"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,61 +8,48 @@ include _includes/_mixins
|
|||
| Natural Language#[br]
|
||||
| Processing
|
||||
|
||||
h2.c-landing__title.o-block.u-heading-1
|
||||
| in Python
|
||||
h2.c-landing__title.o-block.u-heading-3
|
||||
span.u-text-label.u-text-label--light in Python
|
||||
|
||||
+landing-badge(gh("spaCy") + "/releases/tag/v2.0.0-alpha", "v2alpha", "Try spaCy v2.0.0 alpha!")
|
||||
+grid.o-content.c-landing__blocks
|
||||
+grid-col("third").c-landing__card.o-card.o-grid.o-grid--space
|
||||
+h(3) Fastest in the world
|
||||
p
|
||||
| spaCy excels at large-scale information extraction tasks.
|
||||
| It's written from the ground up in carefully memory-managed
|
||||
| Cython. Independent research has confirmed that spaCy is
|
||||
| the fastest in the world. If your application needs to
|
||||
| process entire web dumps, spaCy is the library you want to
|
||||
| be using.
|
||||
|
||||
+grid.o-content
|
||||
+grid-col("third").o-card
|
||||
+h(2) Fastest in the world
|
||||
p
|
||||
| spaCy excels at large-scale information extraction tasks.
|
||||
| It's written from the ground up in carefully memory-managed
|
||||
| Cython. Independent research has confirmed that spaCy is
|
||||
| the fastest in the world. If your application needs to
|
||||
| process entire web dumps, spaCy is the library you want to
|
||||
| be using.
|
||||
+button("/usage/facts-figures", true, "primary")
|
||||
| Facts & figures
|
||||
|
||||
+button("/docs/api", true, "primary")
|
||||
| Facts & figures
|
||||
+grid-col("third").c-landing__card.o-card.o-grid.o-grid--space
|
||||
+h(3) Get things done
|
||||
p
|
||||
| spaCy is designed to help you do real work — to build real
|
||||
| products, or gather real insights. The library respects
|
||||
| your time, and tries to avoid wasting it. It's easy to
|
||||
| install, and its API is simple and productive. We like to
|
||||
| think of spaCy as the Ruby on Rails of Natural Language
|
||||
| Processing.
|
||||
|
||||
+grid-col("third").o-card
|
||||
+h(2) Get things done
|
||||
p
|
||||
| spaCy is designed to help you do real work — to build real
|
||||
| products, or gather real insights. The library respects
|
||||
| your time, and tries to avoid wasting it. It's easy to
|
||||
| install, and its API is simple and productive. I like to
|
||||
| think of spaCy as the Ruby on Rails of Natural Language
|
||||
| Processing.
|
||||
+button("/usage", true, "primary")
|
||||
| Get started
|
||||
|
||||
+button("/docs/usage", true, "primary")
|
||||
| Get started
|
||||
+grid-col("third").c-landing__card.o-card.o-grid.o-grid--space
|
||||
+h(3) Deep learning
|
||||
p
|
||||
| spaCy is the best way to prepare text for deep learning.
|
||||
| It interoperates seamlessly with TensorFlow, PyTorch,
|
||||
| scikit-learn, Gensim and the
|
||||
| rest of Python's awesome AI ecosystem. spaCy helps you
|
||||
| connect the statistical models trained by these libraries
|
||||
| to the rest of your application.
|
||||
|
||||
+grid-col("third").o-card
|
||||
+h(2) Deep learning
|
||||
p
|
||||
| spaCy is the best way to prepare text for deep learning.
|
||||
| It interoperates seamlessly with
|
||||
| #[+a("https://www.tensorflow.org") TensorFlow],
|
||||
| #[+a("https://keras.io") Keras],
|
||||
| #[+a("http://scikit-learn.org") Scikit-Learn],
|
||||
| #[+a("https://radimrehurek.com/gensim") Gensim] and the
|
||||
| rest of Python's awesome AI ecosystem. spaCy helps you
|
||||
| connect the statistical models trained by these libraries
|
||||
| to the rest of your application.
|
||||
|
||||
+button("/docs/usage/deep-learning", true, "primary")
|
||||
| Read more
|
||||
|
||||
.o-inline-list.o-block.u-border-bottom.u-text-small.u-text-center.u-padding-small
|
||||
+a(gh("spaCy") + "/releases")
|
||||
strong.u-text-label.u-color-subtle #[+icon("code", 18)] Latest release:
|
||||
| v#{SPACY_VERSION}
|
||||
|
||||
if LATEST_NEWS
|
||||
+a(LATEST_NEWS.url) #[+tag.o-icon New!] #{LATEST_NEWS.title}
|
||||
+button("/usage/deep-learning", true, "primary")
|
||||
| Read more
|
||||
|
||||
.o-content
|
||||
+grid
|
||||
|
@ -92,67 +79,77 @@ include _includes/_mixins
|
|||
+h(2) Features
|
||||
+list
|
||||
+item Non-destructive #[strong tokenization]
|
||||
+item Syntax-driven sentence segmentation
|
||||
+item Support for #[strong #{LANG_COUNT}+ languages]
|
||||
+item #[strong #{MODEL_COUNT} statistical models] for #{MODEL_LANG_COUNT} languages
|
||||
+item Pre-trained #[strong word vectors]
|
||||
+item Easy #[strong deep learning] integration
|
||||
+item Part-of-speech tagging
|
||||
+item #[strong Named entity] recognition
|
||||
+item Labelled dependency parsing
|
||||
+item Syntax-driven sentence segmentation
|
||||
+item Built in #[strong visualizers] for syntax and NER
|
||||
+item Convenient string-to-hash mapping
|
||||
+item Export to numpy data arrays
|
||||
+item GIL-free #[strong multi-threading]
|
||||
+item Efficient binary serialization
|
||||
+item Easy #[strong deep learning] integration
|
||||
+item Statistical models for #[strong English] and #[strong German]
|
||||
+item Easy #[strong model packaging] and deployment
|
||||
+item State-of-the-art speed
|
||||
+item Robust, rigorously evaluated accuracy
|
||||
|
||||
+landing-banner("Convolutional neural network models", "New in v2.0")
|
||||
p
|
||||
| spaCy v2.0 features new neural models for #[strong tagging],
|
||||
| #[strong parsing] and #[strong entity recognition]. The models have
|
||||
| been designed and implemented from scratch specifically for spaCy, to
|
||||
| give you an unmatched balance of speed, size and accuracy. A novel
|
||||
| bloom embedding strategy with subword features is used to support
|
||||
| huge vocabularies in tiny tables. Convolutional layers with residual
|
||||
| connections, layer normalization and maxout non-linearity are used,
|
||||
| giving much better efficiency than the standard BiLSTM solution.
|
||||
| Finally, the parser and NER use an imitation learning objective to
|
||||
| deliver accuracy in-line with the latest research systems,
|
||||
| even when evaluated from raw text. With these innovations, spaCy
|
||||
| v2.0's models are #[strong 10× smaller],
|
||||
| #[strong 20% more accurate], and #[strong just as fast] as the
|
||||
| previous generation.
|
||||
|
||||
.o-block-small.u-text-right
|
||||
+button("/models", true, "secondary-light") Download models
|
||||
|
||||
+landing-logos("spaCy is trusted by", logos)
|
||||
+button(gh("spacy") + "/stargazers", false, "secondary", "small")
|
||||
| and many more
|
||||
|
||||
+landing-logos("Featured on", features).o-block-small
|
||||
|
||||
+landing-banner("Prodigy: Radically efficient machine teaching", "From the makers of spaCy")
|
||||
p
|
||||
| Prodigy is an #[strong annotation tool] so efficient that data scientists can
|
||||
| do the annotation themselves, enabling a new level of rapid
|
||||
| iteration. Whether you're working on entity recognition, intent
|
||||
| detection or image classification, Prodigy can help you
|
||||
| #[strong train and evaluate] your models faster. Stream in your own examples or
|
||||
| real-world data from live APIs, update your model in real-time and
|
||||
| chain models together to build more complex systems.
|
||||
|
||||
.o-block-small.u-text-right
|
||||
+button("https://prodi.gy", true, "secondary-light") Try it out
|
||||
|
||||
.o-content
|
||||
+grid
|
||||
+grid-col("half")
|
||||
+h(2) Benchmarks
|
||||
|
||||
p
|
||||
| In 2015, independent researchers from Emory University and
|
||||
| Yahoo! Labs showed that spaCy offered the
|
||||
| #[strong fastest syntactic parser in the world] and that its
|
||||
| accuracy was #[strong within 1% of the best] available
|
||||
| (#[+a("https://aclweb.org/anthology/P/P15/P15-1038.pdf") Choi et al., 2015]).
|
||||
| spaCy v2.0, released in 2017, is more accurate than any of
|
||||
| the systems Choi et al. evaluated.
|
||||
|
||||
.o-inline-list
|
||||
+button("/docs/usage/lightning-tour", true, "secondary")
|
||||
| See examples
|
||||
+button("/usage/facts-figures#benchmarks", true, "secondary") See details
|
||||
|
||||
.o-block.u-text-center.u-padding
|
||||
h3.u-text-label.u-color-subtle.o-block spaCy is trusted by
|
||||
|
||||
each row in logos
|
||||
+grid("center").o-inline-list
|
||||
each details, name in row
|
||||
+a(details[0])
|
||||
img(src="/assets/img/logos/#{name}.png" alt=name width=(details[1] || 150)).u-padding-small
|
||||
|
||||
.u-pattern.u-padding
|
||||
+grid.o-card.o-content
|
||||
+grid-col("quarter")
|
||||
img(src="/assets/img/profile_matt.png" width="280")
|
||||
|
||||
+grid-col("three-quarters")
|
||||
+h(2) What's spaCy all about?
|
||||
|
||||
p
|
||||
| By 2014, I'd been publishing NLP research for about 10
|
||||
| years. During that time, I saw a huge gap open between the
|
||||
| technology that Google-sized companies could take to market,
|
||||
| and what was available to everyone else. This was especially
|
||||
| clear when companies started trying to use my research. Like
|
||||
| most researchers, my work was free to read, but expensive to
|
||||
| apply. You could run my code, but its requirements were
|
||||
| narrow. My code's mission in life was to print results
|
||||
| tables for my papers — it was good at this job, and bad at
|
||||
| all others.
|
||||
|
||||
p
|
||||
| spaCy's #[+a("/docs/api/philosophy") mission] is to make
|
||||
| cutting-edge NLP practical and commonly available. That's
|
||||
| why I left academia in 2014, to build a production-quality
|
||||
| open-source NLP library. It's why
|
||||
| #[+a("https://twitter.com/_inesmontani") Ines] joined the
|
||||
| project in 2015, to build visualisations, demos and
|
||||
| annotation tools that make NLP technologies less abstract
|
||||
| and easier to use. Together, we've founded
|
||||
| #[+a(COMPANY_URL, true) Explosion AI], to develop data packs
|
||||
| you can drop into spaCy to extend its capabilities. If
|
||||
| you're processing Hindi insurance claims, you need a model
|
||||
| for that. We can build it for you.
|
||||
|
||||
.o-block
|
||||
+a("https://twitter.com/honnibal")
|
||||
+svg("graphics", "matt-signature", 60, 45).u-color-theme
|
||||
+grid-col("half")
|
||||
include usage/_facts-figures/_benchmarks-choi-2015
|
||||
|
|
Loading…
Reference in New Issue
Block a user