Update docs [ci skip]
|
@ -609,7 +609,6 @@ In addition to the native markdown elements, you can use the components
|
|||
├── docs # the actual markdown content
|
||||
├── meta # JSON-formatted site metadata
|
||||
| ├── languages.json # supported languages and statistical models
|
||||
| ├── logos.json # logos and links for landing page
|
||||
| ├── sidebars.json # sidebar navigations for different sections
|
||||
| ├── site.json # general site metadata
|
||||
| └── universe.json # data for the spaCy universe section
|
||||
|
|
|
@ -38,7 +38,7 @@ how the component should be configured. You can override its settings via the
|
|||
| `validate` | Whether patterns should be validated (passed to the `Matcher`). Defaults to `False`. ~~bool~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/attributeruler.py
|
||||
%%GITHUB_SPACY/spacy/pipeline/attributeruler.py
|
||||
```
|
||||
|
||||
## AttributeRuler.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -230,12 +230,12 @@ $ python -m spacy convert [input_file] [output_dir] [--converter] [--file-type]
|
|||
### Converters {#converters}
|
||||
|
||||
| ID | Description |
|
||||
| ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `auto` | Automatically pick converter based on file extension and file content (default). |
|
||||
| `json` | JSON-formatted training data used in spaCy v2.x. |
|
||||
| `conll` | Universal Dependencies `.conllu` or `.conll` format. |
|
||||
| `ner` | NER with IOB/IOB2 tags, one token per line with columns separated by whitespace. The first column is the token and the final column is the IOB tag. Sentences are separated by blank lines and documents are separated by the line `-DOCSTART- -X- O O`. Supports CoNLL 2003 NER format. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data). |
|
||||
| `iob` | NER with IOB/IOB2 tags, one sentence per line with tokens separated by whitespace and annotation separated by `|`, either `word|B-ENT` or `word|POS|B-ENT`. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data). |
|
||||
| `ner` | NER with IOB/IOB2 tags, one token per line with columns separated by whitespace. The first column is the token and the final column is the IOB tag. Sentences are separated by blank lines and documents are separated by the line `-DOCSTART- -X- O O`. Supports CoNLL 2003 NER format. See [sample data](%%GITHUB_SPACY/extra/example_data/ner_example_data). |
|
||||
| `iob` | NER with IOB/IOB2 tags, one sentence per line with tokens separated by whitespace and annotation separated by `|`, either `word|B-ENT` or `word|POS|B-ENT`. See [sample data](%%GITHUB_SPACY/extra/example_data/ner_example_data). |
|
||||
|
||||
## debug {#debug new="3"}
|
||||
|
||||
|
@ -358,37 +358,37 @@ File /path/to/spacy/ml/models/tok2vec.py (line 207)
|
|||
Registry @loggers
|
||||
Name spacy.ConsoleLogger.v1
|
||||
Module spacy.training.loggers
|
||||
File /path/to/spacy/gold/loggers.py (line 8)
|
||||
File /path/to/spacy/training/loggers.py (line 8)
|
||||
ℹ [training.batcher]
|
||||
Registry @batchers
|
||||
Name spacy.batch_by_words.v1
|
||||
Module spacy.training.batchers
|
||||
File /path/to/spacy/gold/batchers.py (line 49)
|
||||
File /path/to/spacy/training/batchers.py (line 49)
|
||||
ℹ [training.batcher.size]
|
||||
Registry @schedules
|
||||
Name compounding.v1
|
||||
Module thinc.schedules
|
||||
File /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 43)
|
||||
File /path/to/thinc/thinc/schedules.py (line 43)
|
||||
ℹ [training.dev_corpus]
|
||||
Registry @readers
|
||||
Name spacy.Corpus.v1
|
||||
Module spacy.training.corpus
|
||||
File /path/to/spacy/gold/corpus.py (line 18)
|
||||
File /path/to/spacy/training/corpus.py (line 18)
|
||||
ℹ [training.optimizer]
|
||||
Registry @optimizers
|
||||
Name Adam.v1
|
||||
Module thinc.optimizers
|
||||
File /Users/ines/Repos/explosion/thinc/thinc/optimizers.py (line 58)
|
||||
File /path/to/thinc/thinc/optimizers.py (line 58)
|
||||
ℹ [training.optimizer.learn_rate]
|
||||
Registry @schedules
|
||||
Name warmup_linear.v1
|
||||
Module thinc.schedules
|
||||
File /Users/ines/Repos/explosion/thinc/thinc/schedules.py (line 91)
|
||||
File /path/to/thinc/thinc/schedules.py (line 91)
|
||||
ℹ [training.train_corpus]
|
||||
Registry @readers
|
||||
Name spacy.Corpus.v1
|
||||
Module spacy.training.corpus
|
||||
File /path/to/spacy/gold/corpus.py (line 18)
|
||||
File /path/to/spacy/training/corpus.py (line 18)
|
||||
```
|
||||
|
||||
</Accordion>
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
title: Corpus
|
||||
teaser: An annotated corpus
|
||||
tag: class
|
||||
source: spacy/gold/corpus.py
|
||||
source: spacy/training/corpus.py
|
||||
new: 3
|
||||
---
|
||||
|
||||
|
@ -42,7 +42,7 @@ streaming.
|
|||
| `limit` | Limit corpus to a subset of examples, e.g. for debugging. Defaults to `0` for no limit. ~~int~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/gold/corpus.py
|
||||
%%GITHUB_SPACY/spacy/training/corpus.py
|
||||
```
|
||||
|
||||
## Corpus.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -24,11 +24,11 @@ With Cython there are four ways of declaring complex data types. Unfortunately
|
|||
we use all four in different places, as they all have different utility:
|
||||
|
||||
| Declaration | Description | Example |
|
||||
| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- |
|
||||
| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------- |
|
||||
| `class` | A normal Python class. | [`Language`](/api/language) |
|
||||
| `cdef class` | A Python extension type. Differs from a normal Python class in that its attributes can be defined on the underlying struct. Can have C-level objects as attributes (notably structs and pointers), and can have methods which have C-level objects as arguments or return types. | [`Lexeme`](/api/cython-classes#lexeme) |
|
||||
| `cdef struct` | A struct is just a collection of variables, sort of like a named tuple, except the memory is contiguous. Structs can't have methods, only attributes. | [`LexemeC`](/api/cython-structs#lexemec) |
|
||||
| `cdef cppclass` | A C++ class. Like a struct, this can be allocated on the stack, but can have methods, a constructor and a destructor. Differs from `cdef class` in that it can be created and destroyed without acquiring the Python global interpreter lock. This style is the most obscure. | [`StateC`](https://github.com/explosion/spaCy/tree/master/spacy/syntax/_state.pxd) |
|
||||
| `cdef cppclass` | A C++ class. Like a struct, this can be allocated on the stack, but can have methods, a constructor and a destructor. Differs from `cdef class` in that it can be created and destroyed without acquiring the Python global interpreter lock. This style is the most obscure. | [`StateC`](%%GITHUB_SPACY/spacy/pipeline/_parser_internals/_state.pxd) |
|
||||
|
||||
The most important classes in spaCy are defined as `cdef class` objects. The
|
||||
underlying data for these objects is usually gathered into a struct, which is
|
||||
|
|
|
@ -37,7 +37,7 @@ recommended settings for your use case, check out the
|
|||
> guide on [registered functions](/usage/training#config-functions) for details.
|
||||
|
||||
```ini
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg
|
||||
%%GITHUB_SPACY/spacy/default_config.cfg
|
||||
```
|
||||
|
||||
<Infobox title="Notes on data validation" emoji="💡">
|
||||
|
@ -45,8 +45,7 @@ https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg
|
|||
Under the hood, spaCy's configs are powered by our machine learning library
|
||||
[Thinc's config system](https://thinc.ai/docs/usage-config), which uses
|
||||
[`pydantic`](https://github.com/samuelcolvin/pydantic/) for data validation
|
||||
based on type hints. See
|
||||
[`spacy/schemas.py`](https://github.com/explosion/spaCy/blob/develop/spacy/schemas.py)
|
||||
based on type hints. See [`spacy/schemas.py`](%%GITHUB_SPACY/spacy/schemas.py)
|
||||
for the schemas used to validate the default config. Arguments of registered
|
||||
functions are validated against their type annotations, if available. To debug
|
||||
your config and check that it's valid, you can run the
|
||||
|
@ -456,7 +455,7 @@ lexical data.
|
|||
Here's an example of the 20 most frequent lexemes in the English training data:
|
||||
|
||||
```json
|
||||
https://github.com/explosion/spaCy/tree/master/examples/training/vocab-data.jsonl
|
||||
%%GITHUB_SPACY / extra / example_data / vocab - data.jsonl
|
||||
```
|
||||
|
||||
## Pipeline meta {#meta}
|
||||
|
|
|
@ -57,7 +57,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/dep_parser.pyx
|
||||
%%GITHUB_SPACY/spacy/pipeline/dep_parser.pyx
|
||||
```
|
||||
|
||||
## DependencyParser.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -50,7 +50,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `get_candidates` | Function that generates plausible candidates for a given `Span` object. Defaults to [CandidateGenerator](/api/architectures#CandidateGenerator), a function looking up exact, case-dependent aliases in the KB. ~~Callable[[KnowledgeBase, Span], Iterable[Candidate]]~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entity_linker.py
|
||||
%%GITHUB_SPACY/spacy/pipeline/entity_linker.py
|
||||
```
|
||||
|
||||
## EntityLinker.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -48,7 +48,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [TransitionBasedParser](/api/architectures#TransitionBasedParser). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/ner.pyx
|
||||
%%GITHUB_SPACY/spacy/pipeline/ner.pyx
|
||||
```
|
||||
|
||||
## EntityRecognizer.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -42,7 +42,7 @@ how the component should be configured. You can override its settings via the
|
|||
| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"||"`. ~~str~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/entityruler.py
|
||||
%%GITHUB_SPACY/spacy/pipeline/entityruler.py
|
||||
```
|
||||
|
||||
## EntityRuler.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
title: Example
|
||||
teaser: A training instance
|
||||
tag: class
|
||||
source: spacy/gold/example.pyx
|
||||
source: spacy/training/example.pyx
|
||||
new: 3.0
|
||||
---
|
||||
|
||||
|
|
|
@ -945,10 +945,10 @@ available to the loaded object.
|
|||
## Class attributes {#class-attributes}
|
||||
|
||||
| Name | Description |
|
||||
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `Defaults` | Settings, data and factory methods for creating the `nlp` object and processing pipeline. ~~Defaults~~ |
|
||||
| `lang` | Two-letter language ID, i.e. [ISO code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes). ~~str~~ |
|
||||
| `default_config` | Base [config](/usage/training#config) to use for [Language.config](/api/language#config). Defaults to [`default_config.cfg`](https://github.com/explosion/spaCy/tree/develop/spacy/default_config.cfg). ~~Config~~ |
|
||||
| `default_config` | Base [config](/usage/training#config) to use for [Language.config](/api/language#config). Defaults to [`default_config.cfg`](%%GITHUB_SPACY/spacy/default_config.cfg). ~~Config~~ |
|
||||
|
||||
## Defaults {#defaults}
|
||||
|
||||
|
@ -982,33 +982,16 @@ customize the default language data:
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `stop_words` | List of stop words, used for `Token.is_stop`.<br />**Example:** [`stop_words.py`][stop_words.py] ~~Set[str]~~ |
|
||||
| `tokenizer_exceptions` | Tokenizer exception rules, string mapped to list of token attributes.<br />**Example:** [`de/tokenizer_exceptions.py`][de/tokenizer_exceptions.py] ~~Dict[str, List[dict]]~~ |
|
||||
| `prefixes`, `suffixes`, `infixes` | Prefix, suffix and infix rules for the default tokenizer.<br />**Example:** [`puncutation.py`][punctuation.py] ~~Optional[List[Union[str, Pattern]]]~~ |
|
||||
| `token_match` | Optional regex for matching strings that should never be split, overriding the infix rules.<br />**Example:** [`fr/tokenizer_exceptions.py`][fr/tokenizer_exceptions.py] ~~Optional[Pattern]~~ |
|
||||
| `url_match` | Regular expression for matching URLs. Prefixes and suffixes are removed before applying the match.<br />**Example:** [`tokenizer_exceptions.py`][tokenizer_exceptions.py] ~~Optional[Pattern]~~ |
|
||||
| `lex_attr_getters` | Custom functions for setting lexical attributes on tokens, e.g. `like_num`.<br />**Example:** [`lex_attrs.py`][lex_attrs.py] ~~Dict[int, Callable[[str], Any]]~~ |
|
||||
| `syntax_iterators` | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks).<br />**Example:** [`syntax_iterators.py`][syntax_iterators.py]. ~~Dict[str, Callable[[Union[Doc, Span]], Iterator[Span]]]~~ |
|
||||
| `writing_system` | Information about the language's writing system, available via `Vocab.writing_system`. Defaults to: `{"direction": "ltr", "has_case": True, "has_letters": True}.`.<br />**Example:** [`zh/__init__.py`][zh/__init__.py] ~~Dict[str, Any]~~ |
|
||||
| `config` | Default [config](/usage/training#config) added to `nlp.config`. This can include references to custom tokenizers or lemmatizers.<br />**Example:** [`zh/__init__.py`][zh/__init__.py] ~~Config~~ |
|
||||
|
||||
[stop_words.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/stop_words.py
|
||||
[tokenizer_exceptions.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/tokenizer_exceptions.py
|
||||
[de/tokenizer_exceptions.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/de/tokenizer_exceptions.py
|
||||
[fr/tokenizer_exceptions.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/fr/tokenizer_exceptions.py
|
||||
[punctuation.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py
|
||||
[lex_attrs.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/lex_attrs.py
|
||||
[syntax_iterators.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/syntax_iterators.py
|
||||
[zh/__init__.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/zh/__init__.py
|
||||
| --------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `stop_words` | List of stop words, used for `Token.is_stop`.<br />**Example:** [`stop_words.py`](%%GITHUB_SPACY/spacy/lang/en/stop_words.py) ~~Set[str]~~ |
|
||||
| `tokenizer_exceptions` | Tokenizer exception rules, string mapped to list of token attributes.<br />**Example:** [`de/tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/de/tokenizer_exceptions.py) ~~Dict[str, List[dict]]~~ |
|
||||
| `prefixes`, `suffixes`, `infixes` | Prefix, suffix and infix rules for the default tokenizer.<br />**Example:** [`puncutation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) ~~Optional[List[Union[str, Pattern]]]~~ |
|
||||
| `token_match` | Optional regex for matching strings that should never be split, overriding the infix rules.<br />**Example:** [`fr/tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/fr/tokenizer_exceptions.py) ~~Optional[Pattern]~~ |
|
||||
| `url_match` | Regular expression for matching URLs. Prefixes and suffixes are removed before applying the match.<br />**Example:** [`tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/tokenizer_exceptions.py) ~~Optional[Pattern]~~ |
|
||||
| `lex_attr_getters` | Custom functions for setting lexical attributes on tokens, e.g. `like_num`.<br />**Example:** [`lex_attrs.py`](%%GITHUB_SPACY/spacy/lang/en/lex_attrs.py) ~~Dict[int, Callable[[str], Any]]~~ |
|
||||
| `syntax_iterators` | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks).<br />**Example:** [`syntax_iterators.py`](%%GITHUB_SPACY/spacy/lang/en/syntax_iterators.py). ~~Dict[str, Callable[[Union[Doc, Span]], Iterator[Span]]]~~ |
|
||||
| `writing_system` | Information about the language's writing system, available via `Vocab.writing_system`. Defaults to: `{"direction": "ltr", "has_case": True, "has_letters": True}.`.<br />**Example:** [`zh/__init__.py`](%%GITHUB_SPACY/spacy/lang/zh/__init__.py) ~~Dict[str, Any]~~ |
|
||||
| `config` | Default [config](/usage/training#config) added to `nlp.config`. This can include references to custom tokenizers or lemmatizers.<br />**Example:** [`zh/__init__.py`](%%GITHUB_SPACY/spacy/lang/zh/__init__.py) ~~Config~~ |
|
||||
|
||||
## Serialization fields {#serialization-fields}
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ data formats used by the lookup and rule-based lemmatizers, see
|
|||
| `model` | **Not yet implemented:** the model to use. ~~Model~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/lemmatizer.py
|
||||
%%GITHUB_SPACY/spacy/pipeline/lemmatizer.py
|
||||
```
|
||||
|
||||
## Lemmatizer.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -37,7 +37,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `model` | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/morphologizer.pyx
|
||||
%%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
|
||||
```
|
||||
|
||||
## Morphologizer.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -22,7 +22,7 @@ for how to use the `Pipe` base class to implement custom components.
|
|||
> inherit from `Pipe`.
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/pipe.pyx
|
||||
%%GITHUB_SPACY/spacy/pipeline/pipe.pyx
|
||||
```
|
||||
|
||||
## Pipe.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -34,7 +34,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/senter.pyx
|
||||
%%GITHUB_SPACY/spacy/pipeline/senter.pyx
|
||||
```
|
||||
|
||||
## SentenceRecognizer.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -33,7 +33,7 @@ how the component should be configured. You can override its settings via the
|
|||
| `punct_chars` | Optional custom list of punctuation characters that mark sentence ends. See below for defaults if not set. Defaults to `None`. ~~Optional[List[str]]~~ | `None` |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/sentencizer.pyx
|
||||
%%GITHUB_SPACY/spacy/pipeline/sentencizer.pyx
|
||||
```
|
||||
|
||||
## Sentencizer.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -34,7 +34,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `model` | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/tagger.pyx
|
||||
%%GITHUB_SPACY/spacy/pipeline/tagger.pyx
|
||||
```
|
||||
|
||||
## Tagger.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -41,7 +41,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `model` | A model instance that predicts scores for each category. Defaults to [TextCatEnsemble](/api/architectures#TextCatEnsemble). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/textcat.py
|
||||
%%GITHUB_SPACY/spacy/pipeline/textcat.py
|
||||
```
|
||||
|
||||
## TextCategorizer.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -45,7 +45,7 @@ architectures and their arguments and hyperparameters.
|
|||
| `model` | The model to use. Defaults to [HashEmbedCNN](/api/architectures#HashEmbedCNN). ~~Model[List[Doc], List[Floats2d]~~ |
|
||||
|
||||
```python
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/pipeline/tok2vec.py
|
||||
%%GITHUB_SPACY/spacy/pipeline/tok2vec.py
|
||||
```
|
||||
|
||||
## Tok2Vec.\_\_init\_\_ {#init tag="method"}
|
||||
|
|
|
@ -105,8 +105,7 @@ your installation, installed pipelines and local setup from within spaCy.
|
|||
### spacy.explain {#spacy.explain tag="function"}
|
||||
|
||||
Get a description for a given POS tag, dependency label or entity type. For a
|
||||
list of available terms, see
|
||||
[`glossary.py`](https://github.com/explosion/spaCy/tree/master/spacy/glossary.py).
|
||||
list of available terms, see [`glossary.py`](%%GITHUB_SPACY/spacy/glossary.py).
|
||||
|
||||
> #### Example
|
||||
>
|
||||
|
@ -263,10 +262,10 @@ If a setting is not present in the options, the default value will be used.
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| --------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ |
|
||||
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
|
||||
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](https://github.com/explosion/spaCy/blob/master/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
|
||||
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
|
||||
|
||||
By default, displaCy comes with colors for all entity types used by
|
||||
[spaCy's trained pipelines](/models). If you're using custom entity types, you
|
||||
|
@ -348,7 +347,7 @@ See the [`Transformer`](/api/transformer) API reference and
|
|||
| [`span_getters`](/api/transformer#span_getters) | Registry for functions that take a batch of `Doc` objects and return a list of `Span` objects to process by the transformer, e.g. sentences. |
|
||||
| [`annotation_setters`](/api/transformer#annotation_setters) | Registry for functions that create annotation setters. Annotation setters are functions that take a batch of `Doc` objects and a [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) and can set additional annotations on the `Doc`. |
|
||||
|
||||
## Loggers {#loggers source="spacy/gold/loggers.py" new="3"}
|
||||
## Loggers {#loggers source="spacy/training/loggers.py" new="3"}
|
||||
|
||||
A logger records the training results. When a logger is created, two functions
|
||||
are returned: one for logging the information for each training step, and a
|
||||
|
@ -452,7 +451,7 @@ remain in the config file stored on your local system.
|
|||
| `project_name` | The name of the project in the Weights & Biases interface. The project will be created automatically if it doesn't exist yet. ~~str~~ |
|
||||
| `remove_config_values` | A list of values to include from the config before it is uploaded to W&B (default: empty). ~~List[str]~~ |
|
||||
|
||||
## Batchers {#batchers source="spacy/gold/batchers.py" new="3"}
|
||||
## Batchers {#batchers source="spacy/training/batchers.py" new="3"}
|
||||
|
||||
A data batcher implements a batching strategy that essentially turns a stream of
|
||||
items into a stream of batches, with each batch consisting of one item or a list
|
||||
|
@ -536,7 +535,7 @@ sequences in the batch.
|
|||
| `discard_oversize` | Whether to discard sequences that are by themselves longer than the largest padded batch size. ~~bool~~ |
|
||||
| `get_length` | Optional function that receives a sequence item and returns its length. Defaults to the built-in `len()` if not set. ~~Optional[Callable[[Any], int]]~~ |
|
||||
|
||||
## Training data and alignment {#gold source="spacy/gold"}
|
||||
## Training data and alignment {#gold source="spacy/training"}
|
||||
|
||||
### training.biluo_tags_from_offsets {#biluo_tags_from_offsets tag="function"}
|
||||
|
||||
|
@ -616,12 +615,12 @@ token-based tags, e.g. to overwrite the `doc.ents`.
|
|||
## Utility functions {#util source="spacy/util.py"}
|
||||
|
||||
spaCy comes with a small collection of utility functions located in
|
||||
[`spacy/util.py`](https://github.com/explosion/spaCy/tree/master/spacy/util.py).
|
||||
Because utility functions are mostly intended for **internal use within spaCy**,
|
||||
their behavior may change with future releases. The functions documented on this
|
||||
page should be safe to use and we'll try to ensure backwards compatibility.
|
||||
However, we recommend having additional tests in place if your application
|
||||
depends on any of spaCy's utilities.
|
||||
[`spacy/util.py`](%%GITHUB_SPACY/spacy/util.py). Because utility functions are
|
||||
mostly intended for **internal use within spaCy**, their behavior may change
|
||||
with future releases. The functions documented on this page should be safe to
|
||||
use and we'll try to ensure backwards compatibility. However, we recommend
|
||||
having additional tests in place if your application depends on any of spaCy's
|
||||
utilities.
|
||||
|
||||
### util.get_lang_class {#util.get_lang_class tag="function"}
|
||||
|
||||
|
@ -833,8 +832,8 @@ Compile a sequence of prefix rules into a regex object.
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `entries` | The prefix rules, e.g. [`lang.punctuation.TOKENIZER_PREFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
|
||||
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `entries` | The prefix rules, e.g. [`lang.punctuation.TOKENIZER_PREFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
|
||||
| **RETURNS** | The regex object. to be used for [`Tokenizer.prefix_search`](/api/tokenizer#attributes). ~~Pattern~~ |
|
||||
|
||||
### util.compile_suffix_regex {#util.compile_suffix_regex tag="function"}
|
||||
|
@ -850,8 +849,8 @@ Compile a sequence of suffix rules into a regex object.
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `entries` | The suffix rules, e.g. [`lang.punctuation.TOKENIZER_SUFFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
|
||||
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `entries` | The suffix rules, e.g. [`lang.punctuation.TOKENIZER_SUFFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
|
||||
| **RETURNS** | The regex object. to be used for [`Tokenizer.suffix_search`](/api/tokenizer#attributes). ~~Pattern~~ |
|
||||
|
||||
### util.compile_infix_regex {#util.compile_infix_regex tag="function"}
|
||||
|
@ -867,8 +866,8 @@ Compile a sequence of infix rules into a regex object.
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `entries` | The infix rules, e.g. [`lang.punctuation.TOKENIZER_INFIXES`](https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
|
||||
| ----------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `entries` | The infix rules, e.g. [`lang.punctuation.TOKENIZER_INFIXES`](%%GITHUB_SPACY/spacy/lang/punctuation.py). ~~Iterable[Union[str, Pattern]]~~ |
|
||||
| **RETURNS** | The regex object. to be used for [`Tokenizer.infix_finditer`](/api/tokenizer#attributes). ~~Pattern~~ |
|
||||
|
||||
### util.minibatch {#util.minibatch tag="function" new="2"}
|
||||
|
|
|
@ -31,7 +31,7 @@ supports all models that are available via the
|
|||
Usually you will connect subsequent components to the shared transformer using
|
||||
the [TransformerListener](/api/architectures#TransformerListener) layer. This
|
||||
works similarly to spaCy's [Tok2Vec](/api/tok2vec) component and
|
||||
[Tok2VecListener](/api/architectures/Tok2VecListener) sublayer.
|
||||
[Tok2VecListener](/api/architectures/#Tok2VecListener) sublayer.
|
||||
|
||||
The component assigns the output of the transformer to the `Doc`'s extension
|
||||
attributes. We also calculate an alignment between the word-piece tokens and the
|
||||
|
|
BIN
website/docs/images/prodigy_overview.jpg
Normal file
After Width: | Height: | Size: 281 KiB |
BIN
website/docs/images/projects.png
Normal file
After Width: | Height: | Size: 99 KiB |
BIN
website/docs/images/wandb1.jpg
Normal file
After Width: | Height: | Size: 151 KiB |
BIN
website/docs/images/wandb2.jpg
Normal file
After Width: | Height: | Size: 146 KiB |
|
@ -2,9 +2,8 @@ Every language is different – and usually full of **exceptions and special
|
|||
cases**, especially amongst the most common words. Some of these exceptions are
|
||||
shared across languages, while others are **entirely specific** – usually so
|
||||
specific that they need to be hard-coded. The
|
||||
[`lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang) module
|
||||
contains all language-specific data, organized in simple Python files. This
|
||||
makes the data easy to update and extend.
|
||||
[`lang`](%%GITHUB_SPACY/spacy/lang) module contains all language-specific data,
|
||||
organized in simple Python files. This makes the data easy to update and extend.
|
||||
|
||||
The **shared language data** in the directory root includes rules that can be
|
||||
generalized across languages – for example, rules for basic punctuation, emoji,
|
||||
|
@ -23,27 +22,11 @@ values are defined in the [`Language.Defaults`](/api/language#defaults).
|
|||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ----------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| **Stop words**<br />[`stop_words.py`][stop_words.py] | List of most common words of a language that are often useful to filter out, for example "and" or "I". Matching tokens will return `True` for `is_stop`. |
|
||||
| **Tokenizer exceptions**<br />[`tokenizer_exceptions.py`][tokenizer_exceptions.py] | Special-case rules for the tokenizer, for example, contractions like "can't" and abbreviations with punctuation, like "U.K.". |
|
||||
| **Punctuation rules**<br />[`punctuation.py`][punctuation.py] | Regular expressions for splitting tokens, e.g. on punctuation or special characters like emoji. Includes rules for prefixes, suffixes and infixes. |
|
||||
| **Character classes**<br />[`char_classes.py`][char_classes.py] | Character classes to be used in regular expressions, for example, Latin characters, quotes, hyphens or icons. |
|
||||
| **Lexical attributes**<br />[`lex_attrs.py`][lex_attrs.py] | Custom functions for setting lexical attributes on tokens, e.g. `like_num`, which includes language-specific words like "ten" or "hundred". |
|
||||
| **Syntax iterators**<br />[`syntax_iterators.py`][syntax_iterators.py] | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks). |
|
||||
| **Lemmatizer**<br />[`lemmatizer.py`][lemmatizer.py] [`spacy-lookups-data`][spacy-lookups-data] | Custom lemmatizer implementation and lemmatization tables. |
|
||||
|
||||
[stop_words.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/stop_words.py
|
||||
[tokenizer_exceptions.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/de/tokenizer_exceptions.py
|
||||
[punctuation.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/punctuation.py
|
||||
[char_classes.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/char_classes.py
|
||||
[lex_attrs.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/lex_attrs.py
|
||||
[syntax_iterators.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/en/syntax_iterators.py
|
||||
[lemmatizer.py]:
|
||||
https://github.com/explosion/spaCy/tree/master/spacy/lang/fr/lemmatizer.py
|
||||
[spacy-lookups-data]: https://github.com/explosion/spacy-lookups-data
|
||||
| ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| **Stop words**<br />[`stop_words.py`](%%GITHUB_SPACY/spacy/lang/en/stop_words.py) | List of most common words of a language that are often useful to filter out, for example "and" or "I". Matching tokens will return `True` for `is_stop`. |
|
||||
| **Tokenizer exceptions**<br />[`tokenizer_exceptions.py`](%%GITHUB_SPACY/spacy/lang/de/tokenizer_exceptions.py) | Special-case rules for the tokenizer, for example, contractions like "can't" and abbreviations with punctuation, like "U.K.". |
|
||||
| **Punctuation rules**<br />[`punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) | Regular expressions for splitting tokens, e.g. on punctuation or special characters like emoji. Includes rules for prefixes, suffixes and infixes. |
|
||||
| **Character classes**<br />[`char_classes.py`](%%GITHUB_SPACY/spacy/lang/char_classes.py) | Character classes to be used in regular expressions, for example, Latin characters, quotes, hyphens or icons. |
|
||||
| **Lexical attributes**<br />[`lex_attrs.py`](%%GITHUB_SPACY/spacy/lang/en/lex_attrs.py) | Custom functions for setting lexical attributes on tokens, e.g. `like_num`, which includes language-specific words like "ten" or "hundred". |
|
||||
| **Syntax iterators**<br />[`syntax_iterators.py`](%%GITHUB_SPACY/spacy/lang/en/syntax_iterators.py) | Functions that compute views of a `Doc` object based on its syntax. At the moment, only used for [noun chunks](/usage/linguistic-features#noun-chunks). |
|
||||
| **Lemmatizer**<br />[`lemmatizer.py`](%%GITHUB_SPACY/master/spacy/lang/fr/lemmatizer.py) [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) | Custom lemmatizer implementation and lemmatization tables. |
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
import { Help } from 'components/typography'
|
||||
|
||||
| System | Year | Language | Accuracy | Speed (wps) |
|
||||
| -------------- | ---- | --------------- | -------: | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| **spaCy v2.x** | 2017 | Python / Cython | **92.6** | _n/a_ <Help>This table shows speed as benchmarked by Choi et al. We therefore can't provide comparable figures, as we'd be running the benchmark on different hardware.</Help> |
|
||||
| **spaCy v1.x** | 2015 | Python / Cython | 91.8 | 13,963 |
|
||||
| ClearNLP | 2015 | Java | 91.7 | 10,271 |
|
||||
| CoreNLP | 2015 | Java | 89.6 | 8,602 |
|
||||
| MATE | 2015 | Java | 92.5 | 550 |
|
||||
| Turbo | 2015 | C++ | 92.4 | 349 |
|
44
website/docs/usage/_benchmarks-models.md
Normal file
|
@ -0,0 +1,44 @@
|
|||
import { Help } from 'components/typography'; import Link from 'components/link'
|
||||
|
||||
<!-- TODO: update, add project template -->
|
||||
|
||||
<figure>
|
||||
|
||||
| System | Parser | Tagger | NER | WPS<br />CPU <Help>words per second on CPU, higher is better</Help> | WPS<br/>GPU <Help>words per second on GPU, higher is better</Help> |
|
||||
| ------------------------------------------------------------------------- | ----------------: | ----------------: | ---: | ------------------------------------------------------------------: | -----------------------------------------------------------------: |
|
||||
| [`en_core_web_trf`](/models/en#en_core_web_trf) (spaCy v3) | | | | | 6k |
|
||||
| [`en_core_web_lg`](/models/en#en_core_web_lg) (spaCy v3) | | | | | |
|
||||
| `en_core_web_lg` (spaCy v2) | 91.9 | 97.2 | 85.9 | 10k | |
|
||||
| [Stanza](https://stanfordnlp.github.io/stanza/) (StanfordNLP)<sup>1</sup> | _n/a_<sup>2</sup> | _n/a_<sup>2</sup> | 88.8 | 234 | 2k |
|
||||
| <Link to="https://github.com/flairNLP/flair" hideIcon>Flair</Link> | - | 97.9 | 89.3 | | |
|
||||
|
||||
<figcaption class="caption">
|
||||
|
||||
**Accuracy and speed on the
|
||||
[OntoNotes 5.0](https://catalog.ldc.upenn.edu/LDC2013T19) corpus.**<br />**1. **
|
||||
[Qi et al. (2020)](https://arxiv.org/pdf/2003.07082.pdf). **2. ** _Coming soon_:
|
||||
Qi et al. don't report parsing and tagging results on OntoNotes. We're working
|
||||
on training Stanza on this corpus to allow direct comparison.
|
||||
|
||||
</figcaption>
|
||||
|
||||
</figure>
|
||||
|
||||
<figure>
|
||||
|
||||
| System | POS | USA | LAS |
|
||||
| ------------------------------------------------------------------------------ | ---: | ---: | ---: |
|
||||
| spaCy RoBERTa (2020) | | | |
|
||||
| spaCy CNN (2020) | | | |
|
||||
| [Mrini et al.](https://khalilmrini.github.io/Label_Attention_Layer.pdf) (2019) | 97.3 | 97.4 | 96.3 |
|
||||
| [Zhou and Zhao](https://www.aclweb.org/anthology/P19-1230/) (2019) | 97.3 | 97.2 | 95.7 |
|
||||
|
||||
<figcaption class="caption">
|
||||
|
||||
**Accuracy on the Penn Treebank.** See
|
||||
[NLP-progress](http://nlpprogress.com/english/dependency_parsing.html) for more
|
||||
results.
|
||||
|
||||
</figcaption>
|
||||
|
||||
</figure>
|
|
@ -579,12 +579,17 @@ def MyCustomVectors(
|
|||
|
||||
## Pretraining {#pretraining}
|
||||
|
||||
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
|
||||
</Infobox>
|
||||
|
||||
<!--
|
||||
- explain general concept and idea (short!)
|
||||
- present it as a separate lightweight mechanism for pretraining the tok2vec
|
||||
layer
|
||||
- advantages (could also be pros/cons table)
|
||||
- explain how it generates a separate file (!) and how it depends on the same
|
||||
vectors
|
||||
-->
|
||||
|
||||
> #### Raw text format
|
||||
>
|
||||
|
|
|
@ -5,254 +5,55 @@ next: /usage/spacy-101
|
|||
menu:
|
||||
- ['Feature Comparison', 'comparison']
|
||||
- ['Benchmarks', 'benchmarks']
|
||||
# TODO: - ['Citing spaCy', 'citation']
|
||||
---
|
||||
|
||||
## Feature comparison {#comparison}
|
||||
## Comparison {#comparison hidden="true"}
|
||||
|
||||
Here's a quick comparison of the functionalities offered by spaCy,
|
||||
[NLTK](http://www.nltk.org/py-modindex.html) and
|
||||
[CoreNLP](http://stanfordnlp.github.io/CoreNLP/).
|
||||
### When should I use spaCy? {#comparison-usage}
|
||||
|
||||
| | spaCy | NLTK | CoreNLP |
|
||||
| ----------------------- | :----: | :----: | :-----------: |
|
||||
| Programming language | Python | Python | Java / Python |
|
||||
| Neural network models | ✅ | ❌ | ✅ |
|
||||
| Integrated word vectors | ✅ | ❌ | ❌ |
|
||||
| Multi-language support | ✅ | ✅ | ✅ |
|
||||
| Tokenization | ✅ | ✅ | ✅ |
|
||||
| Part-of-speech tagging | ✅ | ✅ | ✅ |
|
||||
| Sentence segmentation | ✅ | ✅ | ✅ |
|
||||
| Dependency parsing | ✅ | ❌ | ✅ |
|
||||
| Entity recognition | ✅ | ✅ | ✅ |
|
||||
| Entity linking | ✅ | ❌ | ❌ |
|
||||
| Coreference resolution | ❌ | ❌ | ✅ |
|
||||
<!-- TODO: update -->
|
||||
|
||||
### When should I use what? {#comparison-usage}
|
||||
|
||||
Natural Language Understanding is an active area of research and development, so
|
||||
there are many different tools or technologies catering to different use-cases.
|
||||
The table below summarizes a few libraries (spaCy,
|
||||
[NLTK](http://www.nltk.org/py-modindex.html), [AllenNLP](https://allennlp.org/),
|
||||
[StanfordNLP](https://stanfordnlp.github.io/stanfordnlp/) and
|
||||
[TensorFlow](https://www.tensorflow.org/)) to help you get a feel for things fit
|
||||
together.
|
||||
|
||||
| | spaCy | NLTK | Allen-<br />NLP | Stanford-<br />NLP | Tensor-<br />Flow |
|
||||
| ----------------------------------------------------------------- | :---: | :--: | :-------------: | :----------------: | :---------------: |
|
||||
| I'm a beginner and just getting started with NLP. | ✅ | ✅ | ❌ | ✅ | ❌ |
|
||||
| I want to build an end-to-end production application. | ✅ | ❌ | ❌ | ❌ | ✅ |
|
||||
| I want to try out different neural network architectures for NLP. | ❌ | ❌ | ✅ | ❌ | ✅ |
|
||||
| I want to try the latest models with state-of-the-art accuracy. | ❌ | ❌ | ✅ | ✅ | ✅ |
|
||||
| I want to train models from my own data. | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| I want my application to be efficient on CPU. | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| Use Cases |
|
||||
| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| ✅ **I'm a beginner and just getting started with NLP.**<br />spaCy makes it easy to get started and comes with extensive documentation, including a beginner-friendly [101 guide](/usage/spacy-101) and a free interactive [online course](https://course.spacy.io). |
|
||||
| ✅ **I want to build an end-to-end production application.** |
|
||||
| ✅ **I want my application to be efficient on CPU.**<br />While spaCy lets you train modern NLP models that are best run on GPU, it also offers CPU-optimized pipelines, which may be less accurate but much cheaper to run. |
|
||||
| ✅ **I want to try out different neural network architectures for NLP.** |
|
||||
| ❌ **I want to build a language generation application.**<br />spaCy's focus is natural language _processing_ and extracting information from large volumes of text. While you can use it to help you re-write existing text, it doesn't include any specific functionality for language generation tasks. |
|
||||
| ❌ **I want to research machine learning algorithms.** |
|
||||
|
||||
## Benchmarks {#benchmarks}
|
||||
|
||||
Two peer-reviewed papers in 2015 confirmed that spaCy offers the **fastest
|
||||
syntactic parser in the world** and that **its accuracy is within 1% of the
|
||||
best** available. The few systems that are more accurate are 20× slower or more.
|
||||
spaCy v3.0 introduces transformer-based pipelines that bring spaCy's accuracy
|
||||
right up to **current state-of-the-art**. You can also use a CPU-optimized
|
||||
pipeline, which is less accurate but much cheaper to run.
|
||||
|
||||
> #### About the evaluation
|
||||
<!-- TODO: -->
|
||||
|
||||
> #### Evaluation details
|
||||
>
|
||||
> The first of the evaluations was published by **Yahoo! Labs** and **Emory
|
||||
> University**, as part of a survey of current parsing technologies
|
||||
> ([Choi et al., 2015](https://aclweb.org/anthology/P/P15/P15-1038.pdf)). Their
|
||||
> results and subsequent discussions helped us develop a novel
|
||||
> psychologically-motivated technique to improve spaCy's accuracy, which we
|
||||
> published in joint work with Macquarie University
|
||||
> ([Honnibal and Johnson, 2015](https://www.aclweb.org/anthology/D/D15/D15-1162.pdf)).
|
||||
> - **OntoNotes 5.0:** spaCy's English models are trained on this corpus, as
|
||||
> it's several times larger than other English treebanks. However, most
|
||||
> systems do not report accuracies on it.
|
||||
> - **Penn Treebank:** The "classic" parsing evaluation for research. However,
|
||||
> it's quite far removed from actual usage: it uses sentences with
|
||||
> gold-standard segmentation and tokenization, from a pretty specific type of
|
||||
> text (articles from a single newspaper, 1984-1989).
|
||||
|
||||
import BenchmarksChoi from 'usage/\_benchmarks-choi.md'
|
||||
import Benchmarks from 'usage/\_benchmarks-models.md'
|
||||
|
||||
<BenchmarksChoi />
|
||||
<Benchmarks />
|
||||
|
||||
### Algorithm comparison {#algorithm}
|
||||
<!-- TODO: update -->
|
||||
|
||||
In this section, we compare spaCy's algorithms to recently published systems,
|
||||
using some of the most popular benchmarks. These benchmarks are designed to help
|
||||
isolate the contributions of specific algorithmic decisions, so they promote
|
||||
slightly "idealized" conditions. Specifically, the text comes pre-processed with
|
||||
"gold standard" token and sentence boundaries. The data sets also tend to be
|
||||
fairly small, to help researchers iterate quickly. These conditions mean the
|
||||
models trained on these data sets are not always useful for practical purposes.
|
||||
<Project id="benchmarks/penn_treebank">
|
||||
|
||||
#### Parse accuracy (Penn Treebank / Wall Street Journal) {#parse-accuracy-penn}
|
||||
The easiest way to reproduce spaCy's benchmarks on the Penn Treebank is to clone
|
||||
our project template.
|
||||
|
||||
This is the "classic" evaluation, so it's the number parsing researchers are
|
||||
most easily able to put in context. However, it's quite far removed from actual
|
||||
usage: it uses sentences with gold-standard segmentation and tokenization, from
|
||||
a pretty specific type of text (articles from a single newspaper, 1984-1989).
|
||||
</Project>
|
||||
|
||||
> #### Methodology
|
||||
>
|
||||
> [Andor et al. (2016)](http://arxiv.org/abs/1603.06042) chose slightly
|
||||
> different experimental conditions from
|
||||
> [Choi et al. (2015)](https://aclweb.org/anthology/P/P15/P15-1038.pdf), so the
|
||||
> two accuracy tables here do not present directly comparable figures.
|
||||
<!-- ## Citing spaCy {#citation}
|
||||
|
||||
| System | Year | Type | Accuracy |
|
||||
| ------------------------------------------------------------ | ---- | ------ | --------: |
|
||||
| spaCy v2.0.0 | 2017 | neural | 94.48 |
|
||||
| spaCy v1.1.0 | 2016 | linear | 92.80 |
|
||||
| [Dozat and Manning][dozat and manning] | 2017 | neural | **95.75** |
|
||||
| [Andor et al.][andor et al.] | 2016 | neural | 94.44 |
|
||||
| [SyntaxNet Parsey McParseface][syntaxnet parsey mcparseface] | 2016 | neural | 94.15 |
|
||||
| [Weiss et al.][weiss et al.] | 2015 | neural | 93.91 |
|
||||
| [Zhang and McDonald][zhang and mcdonald] | 2014 | linear | 93.32 |
|
||||
| [Martins et al.][martins et al.] | 2013 | linear | 93.10 |
|
||||
|
||||
[dozat and manning]: https://arxiv.org/pdf/1611.01734.pdf
|
||||
[andor et al.]: http://arxiv.org/abs/1603.06042
|
||||
[syntaxnet parsey mcparseface]:
|
||||
https://github.com/tensorflow/models/tree/master/research/syntaxnet
|
||||
[weiss et al.]:
|
||||
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43800.pdf
|
||||
[zhang and mcdonald]: http://research.google.com/pubs/archive/38148.pdf
|
||||
[martins et al.]: http://www.cs.cmu.edu/~ark/TurboParser/
|
||||
|
||||
#### NER accuracy (OntoNotes 5, no pre-process) {#ner-accuracy-ontonotes5}
|
||||
|
||||
This is the evaluation we use to tune spaCy's parameters to decide which
|
||||
algorithms are better than the others. It's reasonably close to actual usage,
|
||||
because it requires the parses to be produced from raw text, without any
|
||||
pre-processing.
|
||||
|
||||
| System | Year | Type | Accuracy |
|
||||
| -------------------------------------------------- | ---- | ------ | --------: |
|
||||
| spaCy [`en_core_web_lg`][en_core_web_lg] v2.0.0a3 | 2017 | neural | 85.85 |
|
||||
| [Strubell et al.][strubell et al.] | 2017 | neural | **86.81** |
|
||||
| [Chiu and Nichols][chiu and nichols] | 2016 | neural | 86.19 |
|
||||
| [Durrett and Klein][durrett and klein] | 2014 | neural | 84.04 |
|
||||
| [Ratinov and Roth][ratinov and roth] | 2009 | linear | 83.45 |
|
||||
|
||||
[en_core_web_lg]: /models/en#en_core_web_lg
|
||||
[strubell et al.]: https://arxiv.org/pdf/1702.02098.pdf
|
||||
[chiu and nichols]:
|
||||
https://www.semanticscholar.org/paper/Named-Entity-Recognition-with-Bidirectional-LSTM-C-Chiu-Nichols/10a4db59e81d26b2e0e896d3186ef81b4458b93f
|
||||
[durrett and klein]:
|
||||
https://www.semanticscholar.org/paper/A-Joint-Model-for-Entity-Analysis-Coreference-Typi-Durrett-Klein/28eb033eee5f51c5e5389cbb6b777779203a6778
|
||||
[ratinov and roth]: http://www.aclweb.org/anthology/W09-1119
|
||||
|
||||
### Model comparison {#spacy-models}
|
||||
|
||||
In this section, we provide benchmark accuracies for the pretrained model
|
||||
pipelines we distribute with spaCy. Evaluations are conducted end-to-end from
|
||||
raw text, with no "gold standard" pre-processing, over text from a mix of genres
|
||||
where possible.
|
||||
|
||||
> #### Methodology
|
||||
>
|
||||
> The evaluation was conducted on raw text with no gold standard information.
|
||||
> The parser, tagger and entity recognizer were trained on the
|
||||
> [OntoNotes 5](https://www.gabormelli.com/RKB/OntoNotes_Corpus) corpus, the
|
||||
> word vectors on [Common Crawl](http://commoncrawl.org).
|
||||
|
||||
#### English {#benchmarks-models-english}
|
||||
|
||||
| Model | spaCy | Type | UAS | NER F | POS | WPS | Size |
|
||||
| --------------------------------------------------- | ----- | ------ | -------: | -------: | -------: | --------: | -------: |
|
||||
| [`en_core_web_sm`](/models/en#en_core_web_sm) 2.0.0 | 2.x | neural | 91.7 | 85.3 | 97.0 | 10.1k | **35MB** |
|
||||
| [`en_core_web_md`](/models/en#en_core_web_md) 2.0.0 | 2.x | neural | 91.7 | **85.9** | 97.1 | 10.0k | 115MB |
|
||||
| [`en_core_web_lg`](/models/en#en_core_web_lg) 2.0.0 | 2.x | neural | **91.9** | **85.9** | **97.2** | 10.0k | 812MB |
|
||||
| `en_core_web_sm` 1.2.0 | 1.x | linear | 86.6 | 78.5 | 96.6 | **25.7k** | 50MB |
|
||||
| `en_core_web_md` 1.2.1 | 1.x | linear | 90.6 | 81.4 | 96.7 | 18.8k | 1GB |
|
||||
|
||||
#### Spanish {#benchmarks-models-spanish}
|
||||
|
||||
> #### Evaluation note
|
||||
>
|
||||
> The NER accuracy refers to the "silver standard" annotations in the WikiNER
|
||||
> corpus. Accuracy on these annotations tends to be higher than correct human
|
||||
> annotations.
|
||||
|
||||
| Model | spaCy | Type | UAS | NER F | POS | WPS | Size |
|
||||
| ----------------------------------------------------- | ----- | ------ | -------: | -------: | -------: | ----: | -------: |
|
||||
| [`es_core_news_sm`](/models/es#es_core_news_sm) 2.0.0 | 2.x | neural | 89.8 | 88.7 | **96.9** | _n/a_ | **35MB** |
|
||||
| [`es_core_news_md`](/models/es#es_core_news_md) 2.0.0 | 2.x | neural | **90.2** | 89.0 | 97.8 | _n/a_ | 93MB |
|
||||
| `es_core_web_md` 1.1.0 | 1.x | linear | 87.5 | **94.2** | 96.7 | _n/a_ | 377MB |
|
||||
|
||||
### Detailed speed comparison {#speed-comparison}
|
||||
|
||||
Here we compare the per-document processing time of various spaCy
|
||||
functionalities against other NLP libraries. We show both absolute timings (in
|
||||
ms) and relative performance (normalized to spaCy). Lower is better.
|
||||
|
||||
<Infobox title="Important note" variant="warning">
|
||||
|
||||
This evaluation was conducted in 2015. We're working on benchmarks on current
|
||||
CPU and GPU hardware. In the meantime, we're grateful to the Stanford folks for
|
||||
drawing our attention to what seems to be
|
||||
[a long-standing error](https://nlp.stanford.edu/software/tokenizer.html#Speed)
|
||||
in our CoreNLP benchmarks, especially for their tokenizer. Until we run
|
||||
corrected experiments, we have updated the table using their figures.
|
||||
|
||||
</Infobox>
|
||||
|
||||
> #### Methodology
|
||||
>
|
||||
> - **Set up:** 100,000 plain-text documents were streamed from an SQLite3
|
||||
> database, and processed with an NLP library, to one of three levels of
|
||||
> detail — tokenization, tagging, or parsing. The tasks are additive: to parse
|
||||
> the text you have to tokenize and tag it. The pre-processing was not
|
||||
> subtracted from the times — we report the time required for the pipeline to
|
||||
> complete. We report mean times per document, in milliseconds.
|
||||
> - **Hardware**: Intel i7-3770 (2012)
|
||||
> - **Implementation**:
|
||||
> [`spacy-benchmarks`](https://github.com/explosion/spacy-benchmarks)
|
||||
|
||||
<Table>
|
||||
<thead>
|
||||
<Tr>
|
||||
<Th></Th>
|
||||
<Th colSpan="3">Absolute (ms per doc)</Th>
|
||||
<Th colSpan="3">Relative (to spaCy)</Th>
|
||||
</Tr>
|
||||
<Tr>
|
||||
<Th>System</Th>
|
||||
<Th>Tokenize</Th>
|
||||
<Th>Tag</Th>
|
||||
<Th>Parse</Th>
|
||||
<Th>Tokenize</Th>
|
||||
<Th>Tag</Th>
|
||||
<Th>Parse</Th>
|
||||
</Tr>
|
||||
</thead>
|
||||
<tbody style="text-align: right">
|
||||
<Tr>
|
||||
<Td style="text-align: left"><strong>spaCy</strong></Td>
|
||||
<Td>0.2ms</Td>
|
||||
<Td>1ms</Td>
|
||||
<Td>19ms</Td>
|
||||
<Td>1x</Td>
|
||||
<Td>1x</Td>
|
||||
<Td>1x</Td>
|
||||
</Tr>
|
||||
<Tr>
|
||||
<Td style="text-align: left">CoreNLP</Td>
|
||||
<Td>0.18ms</Td>
|
||||
<Td>10ms</Td>
|
||||
<Td>49ms</Td>
|
||||
<Td>0.9x</Td>
|
||||
<Td>10x</Td>
|
||||
<Td>2.6x</Td>
|
||||
</Tr>
|
||||
<Tr>
|
||||
<Td style="text-align: left">ZPar</Td>
|
||||
<Td>1ms</Td>
|
||||
<Td>8ms</Td>
|
||||
<Td>850ms</Td>
|
||||
<Td>5x</Td>
|
||||
<Td>8x</Td>
|
||||
<Td>44.7x</Td>
|
||||
</Tr>
|
||||
<Tr>
|
||||
<Td style="text-align: left">NLTK</Td>
|
||||
<Td>4ms</Td>
|
||||
<Td>443ms</Td>
|
||||
<Td><em>n/a</em></Td>
|
||||
<Td>20x</Td>
|
||||
<Td>443x</Td>
|
||||
<Td><em>n/a</em></Td>
|
||||
</Tr>
|
||||
</tbody>
|
||||
</Table>
|
||||
<!-- TODO: update -->
|
||||
|
|
|
@ -166,10 +166,9 @@ $ python setup.py build_ext --inplace # compile spaCy
|
|||
```
|
||||
|
||||
Compared to regular install via pip, the
|
||||
[`requirements.txt`](https://github.com/explosion/spaCy/tree/master/requirements.txt)
|
||||
additionally installs developer dependencies such as Cython. See the
|
||||
[quickstart widget](#quickstart) to get the right commands for your platform and
|
||||
Python version.
|
||||
[`requirements.txt`](%%GITHUB_SPACY/requirements.txt) additionally installs
|
||||
developer dependencies such as Cython. See the [quickstart widget](#quickstart)
|
||||
to get the right commands for your platform and Python version.
|
||||
|
||||
#### Ubuntu {#source-ubuntu}
|
||||
|
||||
|
@ -195,16 +194,14 @@ that matches the version that was used to compile your Python interpreter.
|
|||
|
||||
### Run tests {#run-tests}
|
||||
|
||||
spaCy comes with an
|
||||
[extensive test suite](https://github.com/explosion/spaCy/tree/master/spacy/tests).
|
||||
In order to run the tests, you'll usually want to clone the
|
||||
[repository](https://github.com/explosion/spaCy/tree/master/) and
|
||||
[build spaCy from source](#source). This will also install the required
|
||||
spaCy comes with an [extensive test suite](%%GITHUB_SPACY/spacy/tests). In order
|
||||
to run the tests, you'll usually want to clone the [repository](%%GITHUB_SPACY)
|
||||
and [build spaCy from source](#source). This will also install the required
|
||||
development dependencies and test utilities defined in the `requirements.txt`.
|
||||
|
||||
Alternatively, you can find out where spaCy is installed and run `pytest` on
|
||||
that directory. Don't forget to also install the test utilities via spaCy's
|
||||
[`requirements.txt`](https://github.com/explosion/spaCy/tree/master/requirements.txt):
|
||||
[`requirements.txt`](%%GITHUB_SPACY/requirements.txt):
|
||||
|
||||
```bash
|
||||
$ python -c "import os; import spacy; print(os.path.dirname(spacy.__file__))"
|
||||
|
|
|
@ -28,9 +28,9 @@ A **model architecture** is a function that wires up a
|
|||
neural network that is run internally as part of a component in a spaCy
|
||||
pipeline. To define the actual architecture, you can implement your logic in
|
||||
Thinc directly, or you can use Thinc as a thin wrapper around frameworks such as
|
||||
PyTorch, TensorFlow and MXNet. Each Model can also be used as a sublayer of a
|
||||
PyTorch, TensorFlow and MXNet. Each `Model` can also be used as a sublayer of a
|
||||
larger network, allowing you to freely combine implementations from different
|
||||
frameworks into one `Thinc` Model.
|
||||
frameworks into a single model.
|
||||
|
||||
spaCy's built-in components require a `Model` instance to be passed to them via
|
||||
the config system. To change the model architecture of an existing component,
|
||||
|
@ -264,9 +264,10 @@ larger network. This effectively means that you can easily wrap different
|
|||
components from different frameworks, and "glue" them together with Thinc:
|
||||
|
||||
```python
|
||||
from thinc.api import chain, with_array
|
||||
from thinc.api import chain, with_array, PyTorchWrapper
|
||||
from spacy.ml import CharacterEmbed
|
||||
|
||||
wrapped_pt_model = PyTorchWrapper(torch_model)
|
||||
char_embed = CharacterEmbed(width, embed_size, nM, nC)
|
||||
model = chain(char_embed, with_array(wrapped_pt_model))
|
||||
```
|
||||
|
@ -473,18 +474,17 @@ with Model.define_operators({">>": chain}):
|
|||
|
||||
## Create new trainable components {#components}
|
||||
|
||||
<!-- TODO:
|
||||
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
|
||||
</Infobox>
|
||||
|
||||
<!-- TODO:
|
||||
- Interaction with `predict`, `get_loss` and `set_annotations`
|
||||
- Initialization life-cycle with `begin_training`, correlation with add_label
|
||||
|
||||
Example: relation extraction component (implemented as project template)
|
||||
|
||||
Avoid duplication with usage/processing-pipelines#trainable-components ?
|
||||
|
||||
-->
|
||||
|
||||
![Diagram of a pipeline component with its model](../images/layers-architectures.svg)
|
||||
<!-- ![Diagram of a pipeline component with its model](../images/layers-architectures.svg)
|
||||
|
||||
```python
|
||||
def update(self, examples):
|
||||
|
@ -498,3 +498,4 @@ def __call__(self, doc):
|
|||
predictions = self.model([doc])
|
||||
self.set_annotations(predictions)
|
||||
```
|
||||
-->
|
||||
|
|
|
@ -854,24 +854,22 @@ The algorithm can be summarized as follows:
|
|||
</Accordion>
|
||||
|
||||
**Global** and **language-specific** tokenizer data is supplied via the language
|
||||
data in
|
||||
[`spacy/lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang). The
|
||||
tokenizer exceptions define special cases like "don't" in English, which needs
|
||||
to be split into two tokens: `{ORTH: "do"}` and `{ORTH: "n't", NORM: "not"}`.
|
||||
The prefixes, suffixes and infixes mostly define punctuation rules – for
|
||||
example, when to split off periods (at the end of a sentence), and when to leave
|
||||
tokens containing periods intact (abbreviations like "U.S.").
|
||||
data in [`spacy/lang`](%%GITHUB_SPACY/spacy/lang). The tokenizer exceptions
|
||||
define special cases like "don't" in English, which needs to be split into two
|
||||
tokens: `{ORTH: "do"}` and `{ORTH: "n't", NORM: "not"}`. The prefixes, suffixes
|
||||
and infixes mostly define punctuation rules – for example, when to split off
|
||||
periods (at the end of a sentence), and when to leave tokens containing periods
|
||||
intact (abbreviations like "U.S.").
|
||||
|
||||
<Accordion title="Should I change the language data or add custom tokenizer rules?" id="lang-data-vs-tokenizer">
|
||||
|
||||
Tokenization rules that are specific to one language, but can be **generalized
|
||||
across that language** should ideally live in the language data in
|
||||
[`spacy/lang`](https://github.com/explosion/spaCy/tree/master/spacy/lang) – we
|
||||
always appreciate pull requests! Anything that's specific to a domain or text
|
||||
type – like financial trading abbreviations, or Bavarian youth slang – should be
|
||||
added as a special case rule to your tokenizer instance. If you're dealing with
|
||||
a lot of customizations, it might make sense to create an entirely custom
|
||||
subclass.
|
||||
[`spacy/lang`](%%GITHUB_SPACY/spacy/lang) – we always appreciate pull requests!
|
||||
Anything that's specific to a domain or text type – like financial trading
|
||||
abbreviations, or Bavarian youth slang – should be added as a special case rule
|
||||
to your tokenizer instance. If you're dealing with a lot of customizations, it
|
||||
might make sense to create an entirely custom subclass.
|
||||
|
||||
</Accordion>
|
||||
|
||||
|
@ -1059,7 +1057,7 @@ but also detailed regular expressions that take the surrounding context into
|
|||
account. For example, there is a regular expression that treats a hyphen between
|
||||
letters as an infix. If you do not want the tokenizer to split on hyphens
|
||||
between letters, you can modify the existing infix definition from
|
||||
[`lang/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/punctuation.py):
|
||||
[`lang/punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py):
|
||||
|
||||
```python
|
||||
### {executable="true"}
|
||||
|
@ -1096,10 +1094,10 @@ print([t.text for t in doc]) # ['mother-in-law']
|
|||
```
|
||||
|
||||
For an overview of the default regular expressions, see
|
||||
[`lang/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/punctuation.py)
|
||||
and language-specific definitions such as
|
||||
[`lang/de/punctuation.py`](https://github.com/explosion/spaCy/blob/master/spacy/lang/de/punctuation.py)
|
||||
for German.
|
||||
[`lang/punctuation.py`](%%GITHUB_SPACY/spacy/lang/punctuation.py) and
|
||||
language-specific definitions such as
|
||||
[`lang/de/punctuation.py`](%%GITHUB_SPACY/spacy/lang/de/punctuation.py) for
|
||||
German.
|
||||
|
||||
### Hooking a custom tokenizer into the pipeline {#custom-tokenizer}
|
||||
|
||||
|
|
|
@ -76,7 +76,7 @@ spaCy also supports pipelines trained on more than one language. This is
|
|||
especially useful for named entity recognition. The language ID used for
|
||||
multi-language or language-neutral pipelines is `xx`. The language class, a
|
||||
generic subclass containing only the base language data, can be found in
|
||||
[`lang/xx`](https://github.com/explosion/spaCy/tree/master/spacy/lang/xx).
|
||||
[`lang/xx`](%%GITHUB_SPACY/spacy/lang/xx).
|
||||
|
||||
To train a pipeline using the neutral multi-language class, you can set
|
||||
`lang = "xx"` in your [training config](/usage/training#config). You can also
|
||||
|
|
|
@ -728,18 +728,21 @@ workflows, but only one can be tracked by DVC.
|
|||
|
||||
</Infobox>
|
||||
|
||||
<Project id="integrations/dvc">
|
||||
<!-- TODO: <Project id="integrations/dvc">
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
|
||||
sodales lectus, ut sodales orci ullamcorper id. Sed condimentum neque ut erat
|
||||
mattis pretium.
|
||||
|
||||
</Project>
|
||||
</Project> -->
|
||||
|
||||
---
|
||||
|
||||
### Prodigy {#prodigy} <IntegrationLogo name="prodigy" width={100} height="auto" align="right" />
|
||||
|
||||
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
|
||||
|
||||
The Prodigy integration will require a nightly version of Prodigy that supports
|
||||
spaCy v3+.
|
||||
|
||||
</Infobox>
|
||||
|
||||
[Prodigy](https://prodi.gy) is a modern annotation tool for creating training
|
||||
data for machine learning models, developed by us. It integrates with spaCy
|
||||
out-of-the-box and provides many different
|
||||
|
@ -795,9 +798,7 @@ results.
|
|||
|
||||
<Project id="integrations/prodigy">
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
|
||||
sodales lectus, ut sodales orci ullamcorper id. Sed condimentum neque ut erat
|
||||
mattis pretium.
|
||||
<!-- TODO: -->
|
||||
|
||||
</Project>
|
||||
|
||||
|
@ -805,10 +806,6 @@ mattis pretium.
|
|||
|
||||
### Streamlit {#streamlit} <IntegrationLogo name="streamlit" width={150} height="auto" align="right" />
|
||||
|
||||
<Grid cols={2} gutterBottom={false}>
|
||||
|
||||
<div>
|
||||
|
||||
[Streamlit](https://streamlit.io) is a Python framework for building interactive
|
||||
data apps. The [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit)
|
||||
package helps you integrate spaCy visualizations into your Streamlit apps and
|
||||
|
@ -817,16 +814,14 @@ full embedded visualizer, as well as individual components.
|
|||
|
||||
<!-- TODO: update once version is stable -->
|
||||
|
||||
```bash
|
||||
$ pip install "spacy_streamlit>=1.0.0a0"
|
||||
```
|
||||
|
||||
</div>
|
||||
> #### Installation
|
||||
>
|
||||
> ```bash
|
||||
> $ pip install "spacy_streamlit>=1.0.0a0"
|
||||
> ```
|
||||
|
||||
![](../images/spacy-streamlit.png)
|
||||
|
||||
</Grid>
|
||||
|
||||
Using [`spacy-streamlit`](https://github.com/explosion/spacy-streamlit), your
|
||||
projects can easily define their own scripts that spin up an interactive
|
||||
visualizer, using the latest pipeline you trained, or a selection of pipelines
|
||||
|
@ -917,10 +912,43 @@ https://github.com/explosion/projects/blob/v3/integrations/fastapi/scripts/main.
|
|||
|
||||
### Ray {#ray} <IntegrationLogo name="ray" width={100} height="auto" align="right" />
|
||||
|
||||
<Infobox title="This section is still under construction" emoji="🚧" variant="warning">
|
||||
</Infobox>
|
||||
|
||||
<!-- TODO: document -->
|
||||
|
||||
---
|
||||
|
||||
### Weights & Biases {#wandb} <IntegrationLogo name="wandb" width={175} height="auto" align="right" />
|
||||
|
||||
<!-- TODO: link to WandB logger, explain that it's built-in but that you can also do other cool stuff with WandB? And then include example project (still need to decide what we want to do here) -->
|
||||
[Weights & Biases](https://www.wandb.com/) is a popular platform for experiment
|
||||
tracking. spaCy integrates with it out-of-the-box via the
|
||||
[`WandbLogger`](/api/top-level#WandbLogger), which you can add as the
|
||||
`[training.logger]` block of your training [config](/usage/training#config). The
|
||||
results of each step are then logged in your project, together with the full
|
||||
**training config**. This means that _every_ hyperparameter, registered function
|
||||
name and argument will be tracked and you'll be able to see the impact it has on
|
||||
your results.
|
||||
|
||||
> #### Example config
|
||||
>
|
||||
> ```ini
|
||||
> [training.logger]
|
||||
> @loggers = "spacy.WandbLogger.v1"
|
||||
> project_name = "monitor_spacy_training"
|
||||
> remove_config_values = ["paths.train", "paths.dev", "training.dev_corpus.path", "training.train_corpus.path"]
|
||||
> ```
|
||||
|
||||
![Screenshot: Visualized training results](../images/wandb1.jpg)
|
||||
|
||||
![Screenshot: Parameter importance using config values](../images/wandb2.jpg 'Parameter importance using config values')
|
||||
|
||||
<Project id="integrations/wandb">
|
||||
|
||||
Get started with tracking your spaCy training runs in Weights & Biases using our
|
||||
project template. It includes a simple config using the `WandbLogger`, as well
|
||||
as a custom logger implementation you can adjust for your specific use case.
|
||||
|
||||
<!-- TODO: -->
|
||||
|
||||
</Project>
|
||||
|
|
|
@ -192,12 +192,11 @@ of [`Token`](/api/token). This means that all of the attributes that refer to
|
|||
computed properties can't be accessed.
|
||||
|
||||
The uppercase attribute names like `LOWER` or `IS_PUNCT` refer to symbols from
|
||||
the
|
||||
[`spacy.attrs`](https://github.com/explosion/spaCy/tree/master/spacy/attrs.pyx)
|
||||
enum table. They're passed into a function that essentially is a big case/switch
|
||||
statement, to figure out which struct field to return. The same attribute
|
||||
identifiers are used in [`Doc.to_array`](/api/doc#to_array), and a few other
|
||||
places in the code where you need to describe fields like this.
|
||||
the [`spacy.attrs`](%%GITHUB_SPACY/spacy/attrs.pyx) enum table. They're passed
|
||||
into a function that essentially is a big case/switch statement, to figure out
|
||||
which struct field to return. The same attribute identifiers are used in
|
||||
[`Doc.to_array`](/api/doc#to_array), and a few other places in the code where
|
||||
you need to describe fields like this.
|
||||
|
||||
</Accordion>
|
||||
|
||||
|
|
|
@ -187,11 +187,11 @@ add to that data and saves and loads the data to and from a JSON file.
|
|||
>
|
||||
> To see custom serialization methods in action, check out the new
|
||||
> [`EntityRuler`](/api/entityruler) component and its
|
||||
> [source](https://github.com/explosion/spaCy/tree/master/spacy/pipeline/entityruler.py).
|
||||
> Patterns added to the component will be saved to a `.jsonl` file if the
|
||||
> pipeline is serialized to disk, and to a bytestring if the pipeline is
|
||||
> serialized to bytes. This allows saving out a pipeline with a rule-based
|
||||
> entity recognizer and including all rules _with_ the component data.
|
||||
> [source](%%GITHUB_SPACY/spacy/pipeline/entityruler.py). Patterns added to the
|
||||
> component will be saved to a `.jsonl` file if the pipeline is serialized to
|
||||
> disk, and to a bytestring if the pipeline is serialized to bytes. This allows
|
||||
> saving out a pipeline with a rule-based entity recognizer and including all
|
||||
> rules _with_ the component data.
|
||||
|
||||
```python
|
||||
### {highlight="14-18,20-25"}
|
||||
|
|
|
@ -494,7 +494,7 @@ regressions to the parts of the library that you care about the most.
|
|||
|
||||
**For more details on the types of contributions we're looking for, the code
|
||||
conventions and other useful tips, make sure to check out the
|
||||
[contributing guidelines](https://github.com/explosion/spaCy/tree/master/CONTRIBUTING.md).**
|
||||
[contributing guidelines](%%GITHUB_SPACY/CONTRIBUTING.md).**
|
||||
|
||||
<Infobox title="Code of Conduct" variant="warning">
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ specific use case. It's also available in spaCy as the
|
|||
|
||||
import QuickstartTraining from 'widgets/quickstart-training.js'
|
||||
|
||||
<QuickstartTraining download="base_config.cfg" />
|
||||
<QuickstartTraining />
|
||||
|
||||
After you've saved the starter config to a file `base_config.cfg`, you can use
|
||||
the [`init fill-config`](/api/cli#init-fill-config) command to fill in the
|
||||
|
@ -127,7 +127,7 @@ Some of the main advantages and features of spaCy's training config are:
|
|||
config which types of data to expect.
|
||||
|
||||
```ini
|
||||
https://github.com/explosion/spaCy/blob/develop/spacy/default_config.cfg
|
||||
%%GITHUB_SPACY/spacy/default_config.cfg
|
||||
```
|
||||
|
||||
Under the hood, the config is parsed into a dictionary. It's divided into
|
||||
|
|
|
@ -76,9 +76,7 @@ noise contrastive estimation or reinforcement learning.
|
|||
## New features {#features}
|
||||
|
||||
This section contains an overview of the most important **new features and
|
||||
improvements**. The [API docs](/api) include additional deprecation notes. New
|
||||
methods and functions that were introduced in this version are marked with the
|
||||
tag <Tag variant="new">2</Tag>.
|
||||
improvements**. The [API docs](/api) include additional deprecation notes.
|
||||
|
||||
### Convolutional neural network models {#features-models}
|
||||
|
||||
|
|
|
@ -8,20 +8,30 @@ menu:
|
|||
- ['Migrating from v2.x', 'migrating']
|
||||
---
|
||||
|
||||
## Summary {#summary}
|
||||
## Summary {#summary hidden="true"}
|
||||
|
||||
<Grid cols={2}>
|
||||
<Grid cols={2} gutterBottom={false}>
|
||||
|
||||
<div>
|
||||
|
||||
spaCy v3.0 features all new **transformer-based pipelines** that bring spaCy's
|
||||
accuracy right up to the current **state-of-the-art**. You can use any
|
||||
pretrained transformer to train your own pipelines, and even share one
|
||||
transformer between multiple components with **multi-task learning**. Training
|
||||
is now fully configurable and extensible, and you can define your own custom
|
||||
models using **PyTorch**, **TensorFlow** and other frameworks. The new spaCy
|
||||
projects system lets you describe whole **end-to-end workflows** in a single
|
||||
file, giving you an easy path from prototype to production, and making it easy
|
||||
to clone and adapt best-practice projects for your own use cases.
|
||||
|
||||
</div>
|
||||
|
||||
<Infobox title="Table of Contents" id="toc">
|
||||
|
||||
- [Summary](#summary)
|
||||
- [New features](#features)
|
||||
- [Training & config system](#features-training)
|
||||
- [Transformer-based pipelines](#features-transformers)
|
||||
- [Training & config system](#features-training)
|
||||
- [Custom models](#features-custom-models)
|
||||
- [End-to-end project workflows](#features-projects)
|
||||
- [New built-in components](#features-pipeline-components)
|
||||
|
@ -39,47 +49,126 @@ menu:
|
|||
|
||||
## New Features {#features}
|
||||
|
||||
### New training workflow and config system {#features-training}
|
||||
|
||||
<Infobox title="Details & Documentation" emoji="📖" list>
|
||||
|
||||
- **Usage:** [Training pipelines and models](/usage/training)
|
||||
- **Thinc:** [Thinc's config system](https://thinc.ai/docs/usage-config),
|
||||
[`Config`](https://thinc.ai/docs/api-config#config)
|
||||
- **CLI:** [`train`](/api/cli#train), [`pretrain`](/api/cli#pretrain),
|
||||
[`evaluate`](/api/cli#evaluate)
|
||||
- **API:** [Config format](/api/data-formats#config),
|
||||
[`registry`](/api/top-level#registry)
|
||||
|
||||
</Infobox>
|
||||
This section contains an overview of the most important **new features and
|
||||
improvements**. The [API docs](/api) include additional deprecation notes. New
|
||||
methods and functions that were introduced in this version are marked with the
|
||||
tag <Tag variant="new">3</Tag>.
|
||||
|
||||
### Transformer-based pipelines {#features-transformers}
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```cli
|
||||
> $ python -m spacy download en_core_web_trf
|
||||
> ```
|
||||
|
||||
spaCy v3.0 features all new transformer-based pipelines that bring spaCy's
|
||||
accuracy right up to the current **state-of-the-art**. You can use any
|
||||
pretrained transformer to train your own pipelines, and even share one
|
||||
transformer between multiple components with **multi-task learning**. spaCy's
|
||||
transformer support interoperates with [PyTorch](https://pytorch.org) and the
|
||||
[HuggingFace `transformers`](https://huggingface.co/transformers/) library,
|
||||
giving you access to thousands of pretrained models for your pipelines.
|
||||
|
||||
![Pipeline components listening to shared embedding component](../images/tok2vec-listener.svg)
|
||||
|
||||
import Benchmarks from 'usage/\_benchmarks-models.md'
|
||||
|
||||
<Benchmarks />
|
||||
|
||||
<Infobox title="Details & Documentation" emoji="📖" list>
|
||||
|
||||
- **Usage:** [Embeddings & Transformers](/usage/embeddings-transformers),
|
||||
[Training pipelines and models](/usage/training)
|
||||
[Training pipelines and models](/usage/training),
|
||||
[Benchmarks](/usage/facts-figures#benchmarks)
|
||||
- **API:** [`Transformer`](/api/transformer),
|
||||
[`TransformerData`](/api/transformer#transformerdata),
|
||||
[`FullTransformerBatch`](/api/transformer#fulltransformerbatch)
|
||||
- **Architectures: ** [TransformerModel](/api/architectures#TransformerModel),
|
||||
[TransformerListener](/api/architectures#TransformerListener),
|
||||
[Tok2VecTransformer](/api/architectures#Tok2VecTransformer)
|
||||
- **Trained Pipelines:** [`en_core_trf_lg_sm`](/models/en)
|
||||
- **Trained Pipelines:** [`en_core_web_trf`](/models/en#en_core_web_trf)
|
||||
- **Implementation:**
|
||||
[`spacy-transformers`](https://github.com/explosion/spacy-transformers)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### New training workflow and config system {#features-training}
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```ini
|
||||
> [training]
|
||||
> vectors = null
|
||||
> accumulate_gradient = 3
|
||||
>
|
||||
> [training.optimizer]
|
||||
> @optimizers = "Adam.v1"
|
||||
>
|
||||
> [training.optimizer.learn_rate]
|
||||
> @schedules = "warmup_linear.v1"
|
||||
> warmup_steps = 250
|
||||
> total_steps = 20000
|
||||
> initial_rate = 0.01
|
||||
> ```
|
||||
|
||||
spaCy v3.0 introduces a comprehensive and extensible system for **configuring
|
||||
your training runs**. A single configuration file describes every detail of your
|
||||
training run, with no hidden defaults, making it easy to rerun your experiments
|
||||
and track changes. You can use the
|
||||
[quickstart widget](/usage/training#quickstart) or the `init config` command to
|
||||
get started. Instead of providing lots of arguments on the command line, you
|
||||
only need to pass your `config.cfg` file to `spacy train`.
|
||||
|
||||
Training config files include all **settings and hyperparameters** for training
|
||||
your pipeline. Some settings can also be registered **functions** that you can
|
||||
swap out and customize, making it easy to implement your own custom models and
|
||||
architectures.
|
||||
|
||||
<Infobox title="Details & Documentation" emoji="📖" list>
|
||||
|
||||
- **Usage:** [Training pipelines and models](/usage/training)
|
||||
- **Thinc:** [Thinc's config system](https://thinc.ai/docs/usage-config),
|
||||
[`Config`](https://thinc.ai/docs/api-config#config)
|
||||
- **CLI:** [`init config`](/api/cli#init-config),
|
||||
[`init fill-config`](/api/cli#init-fill-config), [`train`](/api/cli#train),
|
||||
[`pretrain`](/api/cli#pretrain), [`evaluate`](/api/cli#evaluate)
|
||||
- **API:** [Config format](/api/data-formats#config),
|
||||
[`registry`](/api/top-level#registry)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Custom models using any framework {#features-custom-models}
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from torch import nn
|
||||
> from thinc.api import PyTorchWrapper
|
||||
>
|
||||
> torch_model = nn.Sequential(
|
||||
> nn.Linear(32, 32),
|
||||
> nn.ReLU(),
|
||||
> nn.Softmax(dim=1)
|
||||
> )
|
||||
> model = PyTorchWrapper(torch_model)
|
||||
> ```
|
||||
|
||||
spaCy's new configuration system makes it easy to customize the neural network
|
||||
models used by the different pipeline components. You can also implement your
|
||||
own architectures via spaCy's machine learning library [Thinc](https://thinc.ai)
|
||||
that provides various layers and utilities, as well as thin wrappers around
|
||||
frameworks like **PyTorch**, **TensorFlow** and **MXNet**. Component models all
|
||||
follow the same unified [`Model`](https://thinc.ai/docs/api-model) API and each
|
||||
`Model` can also be used as a sublayer of a larger network, allowing you to
|
||||
freely combine implementations from different frameworks into a single model.
|
||||
|
||||
<Infobox title="Details & Documentation" emoji="📖" list>
|
||||
|
||||
- **Usage: ** [Layers and architectures](/usage/layers-architectures)
|
||||
- **Thinc: **
|
||||
[Wrapping PyTorch, TensorFlow & MXNet](https://thinc.ai/docs/usage-frameworks)
|
||||
[Wrapping PyTorch, TensorFlow & MXNet](https://thinc.ai/docs/usage-frameworks),
|
||||
[`Model` API](https://thinc.ai/docs/api-model)
|
||||
- **API:** [Model architectures](/api/architectures), [`Pipe`](/api/pipe)
|
||||
|
||||
</Infobox>
|
||||
|
@ -159,8 +248,7 @@ add to your pipeline and customize for your use case:
|
|||
|
||||
- **Usage:** [Processing pipelines](/usage/processing-pipelines)
|
||||
- **API:** [Built-in pipeline components](/api#architecture-pipeline)
|
||||
- **Implementation:**
|
||||
[`spacy/pipeline`](https://github.com/explosion/spaCy/tree/develop/spacy/pipeline)
|
||||
- **Implementation:** [`spacy/pipeline`](%%GITHUB_SPACY/spacy/pipeline)
|
||||
|
||||
</Infobox>
|
||||
|
||||
|
@ -197,15 +285,12 @@ aren't set.
|
|||
[`@Language.factory`](/api/language#factory),
|
||||
[`Language.add_pipe`](/api/language#add_pipe),
|
||||
[`Language.analyze_pipes`](/api/language#analyze_pipes)
|
||||
- **Implementation:**
|
||||
[`spacy/language.py`](https://github.com/explosion/spaCy/tree/develop/spacy/language.py)
|
||||
- **Implementation:** [`spacy/language.py`](%%GITHUB_SPACY/spacy/language.py)
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Dependency matching {#features-dep-matcher}
|
||||
|
||||
<!-- TODO: improve summary -->
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
|
@ -233,7 +318,7 @@ dictionaries**, with each dictionary describing a **token to match** and its
|
|||
[Dependency matching](/usage/rule-based-matching#dependencymatcher),
|
||||
- **API:** [`DependencyMatcher`](/api/dependencymatcher),
|
||||
- **Implementation:**
|
||||
[`spacy/matcher/dependencymatcher.pyx`](https://github.com/explosion/spaCy/tree/develop/spacy/matcher/dependencymatcher.pyx)
|
||||
[`spacy/matcher/dependencymatcher.pyx`](%%GITHUB_SPACY/spacy/matcher/dependencymatcher.pyx)
|
||||
|
||||
</Infobox>
|
||||
|
||||
|
@ -404,11 +489,12 @@ Note that spaCy v3.0 now requires **Python 3.6+**.
|
|||
[`Pipe.begin_training`](/api/pipe#begin_training) now take a function that
|
||||
returns a sequence of `Example` objects to initialize the model instead of a
|
||||
list of tuples.
|
||||
- [`Matcher.add`](/api/matcher#add),
|
||||
[`PhraseMatcher.add`](/api/phrasematcher#add) and
|
||||
[`DependencyMatcher.add`](/api/dependencymatcher#add) now only accept a list
|
||||
of patterns as the second argument (instead of a variable number of
|
||||
arguments). The `on_match` callback becomes an optional keyword argument.
|
||||
- [`Matcher.add`](/api/matcher#add) and
|
||||
[`PhraseMatcher.add`](/api/phrasematcher#add) now only accept a list of
|
||||
patterns as the second argument (instead of a variable number of arguments).
|
||||
The `on_match` callback becomes an optional keyword argument.
|
||||
- The `spacy.gold` module has been renamed to
|
||||
[`spacy.training`](%%GITHUB_SPACY/spacy/training).
|
||||
- The `PRON_LEMMA` symbol and `-PRON-` as an indicator for pronoun lemmas has
|
||||
been removed.
|
||||
- The `TAG_MAP` and `MORPH_RULES` in the language data have been replaced by the
|
||||
|
@ -779,6 +865,20 @@ python -m spacy package ./output ./packages
|
|||
- python setup.py sdist
|
||||
```
|
||||
|
||||
#### Data utilities and gold module {#migrating-gold}
|
||||
|
||||
The `spacy.gold` module has been renamed to `spacy.training`. This mostly
|
||||
affects internals, but if you've been using the span offset conversion utilities
|
||||
[`biluo_tags_from_offsets`](/api/top-level#biluo_tags_from_offsets),
|
||||
[`offsets_from_biluo_tags`](/api/top-level#offsets_from_biluo_tags) or
|
||||
[`spans_from_biluo_tags`](/api/top-level#spans_from_biluo_tags), you'll have to
|
||||
change your imports:
|
||||
|
||||
```diff
|
||||
- from spacy.gold import biluo_tags_from_offsets, spans_from_biluo_tags
|
||||
+ from spacy.training import biluo_tags_from_offsets, spans_from_biluo_tags
|
||||
```
|
||||
|
||||
#### Migration notes for plugin maintainers {#migrating-plugins}
|
||||
|
||||
Thanks to everyone who's been contributing to the spaCy ecosystem by developing
|
||||
|
|
|
@ -8,7 +8,6 @@ const codeBlocksPlugin = require('./src/plugins/remark-code-blocks.js')
|
|||
|
||||
// Import metadata
|
||||
const site = require('./meta/site.json')
|
||||
const logos = require('./meta/logos.json')
|
||||
const sidebars = require('./meta/sidebars.json')
|
||||
const models = require('./meta/languages.json')
|
||||
const universe = require('./meta/universe.json')
|
||||
|
@ -20,11 +19,16 @@ const favicon = isNightly ? `src/images/icon_nightly.png` : `src/images/icon.png
|
|||
const binderBranch = isNightly ? 'nightly' : site.binderBranch
|
||||
const siteUrl = isNightly ? site.siteUrlNightly : site.siteUrl
|
||||
const domain = isNightly ? site.domainNightly : site.domain
|
||||
const branch = isNightly ? 'develop' : 'master'
|
||||
|
||||
// Those variables are going to be replaced in the Markdown, e.g. %%GITHUB_SPACY
|
||||
const replacements = {
|
||||
GITHUB_SPACY: `https://github.com/explosion/spaCy/tree/${branch}`,
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
siteMetadata: {
|
||||
...site,
|
||||
...logos,
|
||||
sidebars,
|
||||
...models,
|
||||
universe,
|
||||
|
@ -121,6 +125,13 @@ module.exports = {
|
|||
{
|
||||
resolve: `gatsby-remark-copy-linked-files`,
|
||||
},
|
||||
{
|
||||
resolve: 'gatsby-remark-find-replace',
|
||||
options: {
|
||||
replacements,
|
||||
prefix: '%%',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
{
|
||||
"logosUsers": [
|
||||
{ "id": "airbnb", "url": "https://www.airbnb.com" },
|
||||
{ "id": "uber", "url": "https://www.uber.com" },
|
||||
{ "id": "quora", "url": "https://www.quora.com" },
|
||||
{ "id": "retriever", "url": "https://www.retriever.no" },
|
||||
{ "id": "stitchfix", "url": "https://www.stitchfix.com" },
|
||||
{ "id": "chartbeat", "url": "https://chartbeat.com" },
|
||||
{ "id": "allenai", "url": "https://allenai.org" }
|
||||
],
|
||||
"logosPublications": [
|
||||
{
|
||||
"id": "recode",
|
||||
"url": "https://www.recode.net/2017/6/22/15855492/ai-artificial-intelligence-nonprofit-good-human-chatbots-machine-learning"
|
||||
},
|
||||
{
|
||||
"id": "wapo",
|
||||
"url": "https://www.washingtonpost.com/news/wonk/wp/2016/05/18/googles-new-artificial-intelligence-cant-understand-these-sentences-can-you/"
|
||||
},
|
||||
{
|
||||
"id": "bbc",
|
||||
"url": "http://www.bbc.co.uk/rd/blog/2017-08-irfs-weeknotes-number-250"
|
||||
},
|
||||
{
|
||||
"id": "microsoft",
|
||||
"url": "https://www.microsoft.com/developerblog/2016/09/13/training-a-classifier-for-relation-extraction-from-medical-literature/"
|
||||
},
|
||||
{
|
||||
"id": "venturebeat",
|
||||
"url": "https://venturebeat.com/2017/01/27/4-ai-startups-that-analyze-customer-reviews/"
|
||||
},
|
||||
{
|
||||
"id": "thoughtworks",
|
||||
"url": "https://www.thoughtworks.com/radar/tools"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -28,7 +28,7 @@
|
|||
},
|
||||
"binderUrl": "explosion/spacy-io-binder",
|
||||
"binderBranch": "live",
|
||||
"binderVersion": "2.3.0",
|
||||
"binderVersion": "3.0.0",
|
||||
"sections": [
|
||||
{ "id": "usage", "title": "Usage Documentation", "theme": "blue" },
|
||||
{ "id": "models", "title": "Models Documentation", "theme": "blue" },
|
||||
|
@ -47,20 +47,19 @@
|
|||
"items": [
|
||||
{ "text": "Usage", "url": "/usage" },
|
||||
{ "text": "Models", "url": "/models" },
|
||||
{ "text": "API", "url": "/api" },
|
||||
{ "text": "Universe", "url": "/universe" }
|
||||
{ "text": "API Reference", "url": "/api" },
|
||||
{ "text": "Online Course", "url": "https://course.spacy.io" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"label": "Support",
|
||||
"label": "Community",
|
||||
"items": [
|
||||
{ "text": "Universe", "url": "/universe" },
|
||||
{ "text": "Issue Tracker", "url": "https://github.com/explosion/spaCy/issues" },
|
||||
{
|
||||
"text": "Stack Overflow",
|
||||
"url": "http://stackoverflow.com/questions/tagged/spacy"
|
||||
},
|
||||
{ "text": "Reddit User Group", "url": "https://www.reddit.com/r/spacynlp/" },
|
||||
{ "text": "Gitter Chat", "url": "https://gitter.im/explosion/spaCy" }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
48
website/package-lock.json
generated
|
@ -14238,6 +14238,46 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"gatsby-remark-find-replace": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/gatsby-remark-find-replace/-/gatsby-remark-find-replace-0.3.0.tgz",
|
||||
"integrity": "sha512-tTXt+ZxD+7hEVtZVbZVrifcQUk2mt4uJNUHhc9cje+93sDa4PrrFBbny9IWgXLj9QH9xDxWOZrI768ApMtbPUQ==",
|
||||
"requires": {
|
||||
"escape-string-regexp": "^2.0.0",
|
||||
"unist-util-visit": "^2.0.1"
|
||||
},
|
||||
"dependencies": {
|
||||
"escape-string-regexp": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz",
|
||||
"integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w=="
|
||||
},
|
||||
"unist-util-is": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-4.0.2.tgz",
|
||||
"integrity": "sha512-Ofx8uf6haexJwI1gxWMGg6I/dLnF2yE+KibhD3/diOqY2TinLcqHXCV6OI5gFVn3xQqDH+u0M625pfKwIwgBKQ=="
|
||||
},
|
||||
"unist-util-visit": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-2.0.3.tgz",
|
||||
"integrity": "sha512-iJ4/RczbJMkD0712mGktuGpm/U4By4FfDonL7N/9tATGIF4imikjOuagyMY53tnZq3NP6BcmlrHhEKAfGWjh7Q==",
|
||||
"requires": {
|
||||
"@types/unist": "^2.0.0",
|
||||
"unist-util-is": "^4.0.0",
|
||||
"unist-util-visit-parents": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"unist-util-visit-parents": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-3.1.0.tgz",
|
||||
"integrity": "sha512-0g4wbluTF93npyPrp/ymd3tCDTMnP0yo2akFD2FIBAYXq/Sga3lwaU1D8OYKbtpioaI6CkDcQ6fsMnmtzt7htw==",
|
||||
"requires": {
|
||||
"@types/unist": "^2.0.0",
|
||||
"unist-util-is": "^4.0.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"gatsby-remark-images": {
|
||||
"version": "3.0.4",
|
||||
"resolved": "https://registry.npmjs.org/gatsby-remark-images/-/gatsby-remark-images-3.0.4.tgz",
|
||||
|
@ -22152,6 +22192,14 @@
|
|||
"clipboard": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"prismjs-bibtex": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/prismjs-bibtex/-/prismjs-bibtex-1.1.0.tgz",
|
||||
"integrity": "sha512-IjZUJP3iTkV1DZ8qcjUF7p7Ji/LPns56jw+INUBPtnBaX4Q/VhtzlRGHM0lxSvdfqUvqgTGF3oM8aogWqzZz2g==",
|
||||
"requires": {
|
||||
"prismjs": "^1.15"
|
||||
}
|
||||
},
|
||||
"private": {
|
||||
"version": "0.1.8",
|
||||
"resolved": "https://registry.npmjs.org/private/-/private-0.1.8.tgz",
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
"gatsby-plugin-sitemap": "^2.0.5",
|
||||
"gatsby-plugin-svgr": "^2.0.1",
|
||||
"gatsby-remark-copy-linked-files": "^2.0.9",
|
||||
"gatsby-remark-find-replace": "^0.3.0",
|
||||
"gatsby-remark-images": "^3.0.4",
|
||||
"gatsby-remark-prismjs": "^3.2.4",
|
||||
"gatsby-remark-smartypants": "^2.0.8",
|
||||
|
@ -44,6 +45,7 @@
|
|||
"node-sass": "^4.11.0",
|
||||
"parse-numeric-range": "0.0.2",
|
||||
"prismjs": "^1.15.0",
|
||||
"prismjs-bibtex": "^1.1.0",
|
||||
"prop-types": "^15.7.2",
|
||||
"react": "^16.8.2",
|
||||
"react-dom": "^16.8.2",
|
||||
|
|
|
@ -2,6 +2,7 @@ import React, { Fragment } from 'react'
|
|||
import PropTypes from 'prop-types'
|
||||
import classNames from 'classnames'
|
||||
import highlightCode from 'gatsby-remark-prismjs/highlight-code.js'
|
||||
import 'prismjs-bibtex'
|
||||
import rangeParser from 'parse-numeric-range'
|
||||
import { StaticQuery, graphql } from 'gatsby'
|
||||
import { window } from 'browser-monads'
|
||||
|
|
|
@ -9,6 +9,7 @@ export default function Grid({
|
|||
narrow = false,
|
||||
gutterBottom = true,
|
||||
className,
|
||||
style,
|
||||
children,
|
||||
}) {
|
||||
const gridClassNames = classNames(classes.root, className, {
|
||||
|
@ -18,7 +19,11 @@ export default function Grid({
|
|||
[classes.third]: cols === 3,
|
||||
[classes.quarter]: cols === 4,
|
||||
})
|
||||
return <div className={gridClassNames}>{children}</div>
|
||||
return (
|
||||
<div className={gridClassNames} style={style}>
|
||||
{children}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
Grid.propTypes = {
|
||||
|
|
|
@ -23,7 +23,7 @@ export default function Infobox({
|
|||
<aside className={infoboxClassNames} id={id}>
|
||||
{title && (
|
||||
<h4 className={classes.title}>
|
||||
{variant !== 'default' && (
|
||||
{variant !== 'default' && !emoji && (
|
||||
<Icon width={18} name={variant} inline className={classes.icon} />
|
||||
)}
|
||||
<span className={classes.titleText}>
|
||||
|
|
|
@ -1,19 +1,17 @@
|
|||
import React, { Fragment } from 'react'
|
||||
import React from 'react'
|
||||
import classNames from 'classnames'
|
||||
|
||||
import pattern from '../images/pattern_blue.jpg'
|
||||
import patternNightly from '../images/pattern_nightly.jpg'
|
||||
import patternOverlay from '../images/pattern_landing.jpg'
|
||||
import patternOverlayNightly from '../images/pattern_landing_nightly.jpg'
|
||||
import logoSvgs from '../images/logos'
|
||||
|
||||
import Grid from './grid'
|
||||
import { Content } from './main'
|
||||
import Button from './button'
|
||||
import CodeBlock from './code'
|
||||
import { H1, H2, H3, Label, InlineList } from './typography'
|
||||
import { H1, H2, H3 } from './typography'
|
||||
import Link from './link'
|
||||
import { chunkArray } from './util'
|
||||
import classes from '../styles/landing.module.sass'
|
||||
|
||||
export const LandingHeader = ({ nightly, style = {}, children }) => {
|
||||
|
@ -39,9 +37,9 @@ export const LandingSubtitle = ({ children }) => (
|
|||
</h2>
|
||||
)
|
||||
|
||||
export const LandingGrid = ({ cols = 3, blocks = false, children }) => (
|
||||
export const LandingGrid = ({ cols = 3, blocks = false, style, children }) => (
|
||||
<Content className={classNames(classes.grid, { [classes.blocks]: blocks })}>
|
||||
<Grid cols={cols} narrow={blocks}>
|
||||
<Grid cols={cols} narrow={blocks} style={style}>
|
||||
{children}
|
||||
</Grid>
|
||||
</Content>
|
||||
|
@ -142,33 +140,3 @@ export const LandingBannerButton = ({ to, small, children }) => (
|
|||
</Button>
|
||||
</div>
|
||||
)
|
||||
|
||||
export const LandingLogos = ({ logos = [], title, maxRow = 4, children }) => {
|
||||
const rows = chunkArray(logos, maxRow)
|
||||
return (
|
||||
<Content className={classes.logos}>
|
||||
{title && <Label>{title}</Label>}
|
||||
{rows.map((logos, i) => (
|
||||
<Fragment key={i}>
|
||||
<InlineList className={classes.logosContent}>
|
||||
{logos.map(({ id, url }, j) => {
|
||||
const Component = logoSvgs[id]
|
||||
return !Component ? null : (
|
||||
<Link
|
||||
to={url}
|
||||
key={j}
|
||||
aria-label={id}
|
||||
hidden
|
||||
className={classes.logo}
|
||||
>
|
||||
<Component />
|
||||
</Link>
|
||||
)
|
||||
})}
|
||||
{i === rows.length - 1 && children}
|
||||
</InlineList>
|
||||
</Fragment>
|
||||
))}
|
||||
</Content>
|
||||
)
|
||||
}
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import React from 'react'
|
||||
import React, { Fragment } from 'react'
|
||||
import classNames from 'classnames'
|
||||
|
||||
import Icon from './icon'
|
||||
import { Help } from './typography'
|
||||
import { isString } from './util'
|
||||
import classes from '../styles/table.module.sass'
|
||||
|
||||
|
@ -16,14 +17,26 @@ function getCellContent(cellChildren) {
|
|||
'✅': { name: 'yes', variant: 'success', 'aria-label': 'positive' },
|
||||
'❌': { name: 'no', variant: 'error', 'aria-label': 'negative' },
|
||||
}
|
||||
const iconRe = new RegExp(`^(${Object.keys(icons).join('|')})`, 'g')
|
||||
let children = isString(cellChildren) ? [cellChildren] : cellChildren
|
||||
if (Array.isArray(children)) {
|
||||
return children.map((child, i) => {
|
||||
if (isString(child)) {
|
||||
const icon = icons[child.trim()]
|
||||
const props = {
|
||||
inline: i < children.length,
|
||||
'aria-hidden': undefined,
|
||||
}
|
||||
if (icon) {
|
||||
const props = { ...icon, inline: i < children.length, 'aria-hidden': undefined }
|
||||
return <Icon {...props} key={i} />
|
||||
return <Icon {...icon} {...props} key={i} />
|
||||
} else if (iconRe.test(child)) {
|
||||
const [, iconName, text] = child.split(iconRe)
|
||||
return (
|
||||
<Fragment key={i}>
|
||||
<Icon {...icons[iconName]} {...props} />
|
||||
{text.trim()}
|
||||
</Fragment>
|
||||
)
|
||||
}
|
||||
// Work around prettier auto-escape
|
||||
if (child.startsWith('\\')) return child.slice(1)
|
||||
|
@ -66,7 +79,22 @@ export const Table = ({ fixed, className, ...props }) => {
|
|||
return <table className={tableClassNames} {...props} />
|
||||
}
|
||||
|
||||
export const Th = props => <th className={classes.th} {...props} />
|
||||
export const Th = ({ children, ...props }) => {
|
||||
const isRotated = children && !isString(children) && children.type && children.type.name == 'Tx'
|
||||
const thClassNames = classNames(classes.th, { [classes.thRotated]: isRotated })
|
||||
return (
|
||||
<th className={thClassNames} {...props}>
|
||||
{children}
|
||||
</th>
|
||||
)
|
||||
}
|
||||
|
||||
// Rotated head, child of Th
|
||||
export const Tx = ({ children, ...props }) => (
|
||||
<div className={classes.tx} {...props}>
|
||||
<span>{children}</span>
|
||||
</div>
|
||||
)
|
||||
|
||||
export const Tr = ({ evenodd = true, children, ...props }) => {
|
||||
const foot = isFootRow(children)
|
||||
|
|
|
@ -64,8 +64,8 @@ export const InlineList = ({ Component = 'p', gutterBottom = true, className, ch
|
|||
return <Component className={listClassNames}>{children}</Component>
|
||||
}
|
||||
|
||||
export const Help = ({ children, size = 16 }) => (
|
||||
<span className={classes.help} data-tooltip={children}>
|
||||
export const Help = ({ children, className, size = 16 }) => (
|
||||
<span className={classNames(classes.help, className)} data-tooltip={children}>
|
||||
<Icon name="help2" width={size} />
|
||||
</span>
|
||||
)
|
||||
|
|
|
@ -6,6 +6,8 @@ import siteMetadata from '../../meta/site.json'
|
|||
|
||||
const htmlToReactParser = new HtmlToReactParser()
|
||||
|
||||
// TODO: update this
|
||||
const DEFAULT_BRANCH = 'develop'
|
||||
export const repo = siteMetadata.repo
|
||||
export const modelsRepo = siteMetadata.modelsRepo
|
||||
|
||||
|
@ -21,7 +23,7 @@ export const headingTextClassName = 'heading-text'
|
|||
* @param {string} [branch] - Optional branch. Defaults to master.
|
||||
* @returns {string} - URL to the file on GitHub.
|
||||
*/
|
||||
export function github(filepath, branch = 'master') {
|
||||
export function github(filepath, branch = DEFAULT_BRANCH) {
|
||||
if (filepath && filepath.startsWith('github.com')) return `https://${filepath}`
|
||||
const path = filepath ? '/tree/' + (branch || 'master') + '/' + filepath : ''
|
||||
return `https://github.com/${repo}${path}`
|
||||
|
@ -33,7 +35,7 @@ export function github(filepath, branch = 'master') {
|
|||
* @param {boolean} [isIndex] - Whether the page is an index, e.g. /api/index.md
|
||||
* @param {string} [branch] - Optional branch on GitHub. Defaults to master.
|
||||
*/
|
||||
export function getCurrentSource(slug, isIndex = false, branch = 'master') {
|
||||
export function getCurrentSource(slug, isIndex = false, branch = DEFAULT_BRANCH) {
|
||||
const ext = isIndex ? '/index.md' : '.md'
|
||||
return github(`website/docs${slug}${ext}`, branch)
|
||||
}
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="45" viewBox="0 0 320 100">
|
||||
<path fill="#FF5A5F" d="M168.7 25.1c0 3.6-2.9 6.5-6.5 6.5s-6.5-2.9-6.5-6.5 2.8-6.5 6.5-6.5c3.7.1 6.5 3 6.5 6.5zm-26.8 13.1v1.6s-3.1-4-9.7-4c-10.9 0-19.4 8.3-19.4 19.8 0 11.4 8.4 19.8 19.4 19.8 6.7 0 9.7-4.1 9.7-4.1V73c0 .8.6 1.4 1.4 1.4h8.1V36.8h-8.1c-.8 0-1.4.7-1.4 1.4zm0 24.1c-1.5 2.2-4.5 4.1-8.1 4.1-6.4 0-11.3-4-11.3-10.8s4.9-10.8 11.3-10.8c3.5 0 6.7 2 8.1 4.1v13.4zm15.5-25.5h9.6v37.6h-9.6V36.8zm143.4-1c-6.6 0-9.7 4-9.7 4V18.7h-9.6v55.7h8.1c.8 0 1.4-.7 1.4-1.4v-1.7s3.1 4.1 9.7 4.1c10.9 0 19.4-8.4 19.4-19.8s-8.5-19.8-19.3-19.8zm-1.6 30.5c-3.7 0-6.6-1.9-8.1-4.1V48.8c1.5-2 4.7-4.1 8.1-4.1 6.4 0 11.3 4 11.3 10.8s-4.9 10.8-11.3 10.8zm-22.7-14.2v22.4h-9.6V53.2c0-6.2-2-8.7-7.4-8.7-2.9 0-5.9 1.5-7.8 3.7v26.2h-9.6V36.8h7.6c.8 0 1.4.7 1.4 1.4v1.6c2.8-2.9 6.5-4 10.2-4 4.2 0 7.7 1.2 10.5 3.6 3.4 2.8 4.7 6.4 4.7 12.7zm-57.7-16.3c-6.6 0-9.7 4-9.7 4V18.7h-9.6v55.7h8.1c.8 0 1.4-.7 1.4-1.4v-1.7s3.1 4.1 9.7 4.1c10.9 0 19.4-8.4 19.4-19.8.1-11.4-8.4-19.8-19.3-19.8zm-1.6 30.5c-3.7 0-6.6-1.9-8.1-4.1V48.8c1.5-2 4.7-4.1 8.1-4.1 6.4 0 11.3 4 11.3 10.8s-4.9 10.8-11.3 10.8zm-26-30.5c2.9 0 4.4.5 4.4.5v8.9s-8-2.7-13 3v26.3H173V36.8h8.1c.8 0 1.4.7 1.4 1.4v1.6c1.8-2.1 5.7-4 8.7-4zM91.5 71c-.5-1.2-1-2.5-1.5-3.6-.8-1.8-1.6-3.5-2.3-5.1l-.1-.1C80.7 47.2 73.3 32 65.5 17l-.3-.6c-.8-1.5-1.6-3.1-2.4-4.7-1-1.8-2-3.7-3.6-5.5C56 2.2 51.4 0 46.5 0c-5 0-9.5 2.2-12.8 6-1.5 1.8-2.6 3.7-3.6 5.5-.8 1.6-1.6 3.2-2.4 4.7l-.3.6C19.7 31.8 12.2 47 5.3 62l-.1.2c-.7 1.6-1.5 3.3-2.3 5.1-.5 1.1-1 2.3-1.5 3.6C.1 74.6-.3 78.1.2 81.7c1.1 7.5 6.1 13.8 13 16.6 2.6 1.1 5.3 1.6 8.1 1.6.8 0 1.8-.1 2.6-.2 3.3-.4 6.7-1.5 10-3.4 4.1-2.3 8-5.6 12.4-10.4 4.4 4.8 8.4 8.1 12.4 10.4 3.3 1.9 6.7 3 10 3.4.8.1 1.8.2 2.6.2 2.8 0 5.6-.5 8.1-1.6 7-2.8 11.9-9.2 13-16.6.8-3.5.4-7-.9-10.7zm-45.1 5.2C41 69.4 37.5 63 36.3 57.6c-.5-2.3-.6-4.3-.3-6.1.2-1.6.8-3 1.6-4.2 1.9-2.7 5.1-4.4 8.8-4.4 3.7 0 7 1.6 8.8 4.4.8 1.2 1.4 2.6 1.6 4.2.3 1.8.2 3.9-.3 6.1-1.2 5.3-4.7 11.7-10.1 18.6zm39.9 4.7c-.7 5.2-4.2 9.7-9.1 11.7-2.4 1-5 1.3-7.6 1-2.5-.3-5-1.1-7.6-2.6-3.6-2-7.2-5.1-11.4-9.7 6.6-8.1 10.6-15.5 12.1-22.1.7-3.1.8-5.9.5-8.5-.4-2.5-1.3-4.8-2.7-6.8-3.1-4.5-8.3-7.1-14.1-7.1s-11 2.7-14.1 7.1c-1.4 2-2.3 4.3-2.7 6.8-.4 2.6-.3 5.5.5 8.5 1.5 6.6 5.6 14.1 12.1 22.2-4.1 4.6-7.8 7.7-11.4 9.7-2.6 1.5-5.1 2.3-7.6 2.6-2.7.3-5.3-.1-7.6-1-4.9-2-8.4-6.5-9.1-11.7-.3-2.5-.1-5 .9-7.8.3-1 .8-2 1.3-3.2.7-1.6 1.5-3.3 2.3-5l.1-.2c6.9-14.9 14.3-30.1 22-44.9l.3-.6c.8-1.5 1.6-3.1 2.4-4.6.8-1.6 1.7-3.1 2.8-4.4 2.1-2.4 4.9-3.7 8-3.7 3.1 0 5.9 1.3 8 3.7 1.1 1.3 2 2.8 2.8 4.4.8 1.5 1.6 3.1 2.4 4.6l.3.6c7.6 14.9 15 30.1 21.9 45v.1c.8 1.6 1.5 3.4 2.3 5 .5 1.2 1 2.2 1.3 3.2.8 2.6 1.1 5.1.7 7.7z"></path>
|
||||
</svg>
|
Before Width: | Height: | Size: 2.7 KiB |
Before Width: | Height: | Size: 9.6 KiB |
|
@ -1,3 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="90" height="26" viewBox="0 0 1000 285">
|
||||
<path fill="#111" d="M542.1 184.3c0 30.97-38.56 29.14-38.56 29.14H465v-56h38.54c39.7-.23 38.56 26.85 38.56 26.85M465 72.05h29.36c30.53 1.6 29.38 24.33 29.38 24.33 0 28.22-33.74 28.68-33.74 28.68h-25V72.06zm70 64.03s26.38-11.25 26.15-41.1c0 0 4-48.87-60.76-54.84h-71.9V245.1h82.4s68.84.24 68.84-57.83c0 0 1.6-39.47-44.75-51.18M348.95 0h302.6v285.17h-302.6V0zM193.14 184.3c0 30.97-38.56 29.14-38.56 29.14h-38.56v-56h38.56c39.7-.23 38.56 26.85 38.56 26.85M116.02 72.05h29.38c30.52 1.6 29.38 24.33 29.38 24.33 0 28.22-33.74 28.68-33.74 28.68h-25.02V72.06zm70 64.03s26.4-11.25 26.17-41.1c0 0 4-48.87-60.78-54.84h-71.9V245.1h82.4s68.86.24 68.86-57.83c0 0 1.6-39.47-44.76-51.18M0 0h302.6v285.17H0V0zM938.8 54.85v37.87S901.85 70 861 69.54c0 0-76.2-1.5-79.64 73.04 0 0-2.75 68.57 78.72 72.47 0 0 34.2 4.13 80.56-25.48v39.25s-62.2 36.95-134.26 8.5c0 0-60.6-22.15-62.9-94.74 0 0-2.52-74.65 78.27-99.43 0 0 21.58-8.26 60.36-4.6 0 0 23.2 2.3 56.7 16.3M697.93 285.17h302.6V0h-302.6v285.17z" />
|
||||
</svg>
|
Before Width: | Height: | Size: 1.0 KiB |
|
@ -1,6 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="180" height="25" viewBox="0 0 915.7 130.1">
|
||||
<path fill="#51a4da" d="M157.8 8.6c-8.1 6.8-15.5 13-23 19.1-.8.7-2.2.7-3.4.7H23.3c-.8 0-1.6-.3-2.6-.4V8.6h137.1z" />
|
||||
<path fill="#b3e4fa" d="M0 33.9c2-.1 4.1-.3 6.1-.3H129c-8.7 7.2-16.2 13.5-23.8 19.6-.8.6-2.3.7-3.4.7H2.4c-.8 0-1.7-.3-2.5-.5.1-6.5.1-13 .1-19.5z" />
|
||||
<path fill="#5bc4bf" d="M36 79.2V59.4h63.2C91 66.3 83.6 72.5 76.1 78.7c-.5.4-1.3.5-1.9.5H36z" />
|
||||
<path fill="#657d8c" d="M613.3 49.4c6.5-3.8 12.5-8.7 19.3-10.9 19.6-6.4 39.7 2.9 48 21.8 6.8 15.4 6.3 31-1.3 46-12.1 24.2-47.3 28-66 8-.1 2-.3 3.6-.3 5.2.1 4.3-2 6.4-6.2 6.3-4.2 0-6.2-2.2-6.2-6.4V8.2c0-4.3 2-6.1 6.3-6.1 4.4 0 6.2 2.1 6.2 6.3.2 13.9.2 27.7.2 41zm-.2 30.1c0 2.6-.1 5.1 0 7.7.5 19.7 19.2 33.3 38.2 27.7 13.9-4.1 22.5-18.7 21-35.8-1.2-13.7-6.6-24.9-20.5-29.7-9.8-3.4-19.3-1.6-27.7 4.4-8.7 6.3-13.1 14.7-11 25.7z M561.7 39h27.7c.3 8.4-2.3 11.2-9.9 11.2-5.3 0-10.6 0-15.9.1-.3 0-.6.2-1.3.3-.2 1-.5 2.1-.5 3.2v45.6c0 1.5.1 3.1.2 4.6 1.2 10.6 8.8 15.1 18.7 10.8 3.5-1.5 7-4.1 9.7.8 1.8 3.2-.7 6.9-5.8 9.4-16.6 8-34.1-1.7-34.8-20-.7-16-.2-32.1-.3-48.2v-6.1c-7.6 0-14.6-.6-21.5.2-7.7.9-13.5 5.7-16.1 12.8-2.4 6.6-3.9 13.8-4.4 20.8-.7 11.4-.1 22.9-.2 34.3-.1 5.5-2.7 7.7-7.6 7-4.5-.6-5.2-3.8-5.2-7.5V72.2 45.5c0-4.3 1.1-7.4 6.2-7.5 5.1-.1 6.4 3 6.4 7.3 0 3.1 0 6.1.4 9.9 9-17.9 25.3-17 41.9-16.2 0-5.1.1-9.5 0-13.9s1.6-6.8 6.2-6.7c4.8 0 6.2 2.7 6.1 6.9-.1 4.3 0 8.6 0 13.7z M267.1 127.1c-36.6 0-61.2-28.5-57.6-66.8 4.8-50.7 52.1-62.9 83-48.9 6.3 2.8 11.5 8.2 16.6 13.1 2.6 2.5 3.4 6.6-.4 9.5-3.6 2.8-6.5 1.2-9.3-1.8-12.7-13.6-28.3-17.6-45.8-12.3-17.9 5.4-27 18.9-30.1 36.7-2.4 13.4-.5 26.3 6.5 38.1 14.1 23.7 48.5 28.7 67.2 9.7 2-2 3.8-4.4 5.3-6.9 1.8-2.9 4.4-4 6.9-2.3 1.9 1.3 4 4.7 3.5 6.4-1.1 3.8-3.2 7.6-5.9 10.5-10.8 11-24.7 15-39.9 15z M704.6 85.7c1.5 13.7 7.5 23.6 20 28 13 4.5 25.6 3.4 36.1-6.9 5.2-5.1 7.6-5.9 10.5-3.1 2.8 2.8 1.9 7-2.4 11.7-.6.6-1.2 1.2-1.9 1.8-15.4 11.7-32.5 13.2-49.8 6-16.7-6.9-24.3-20.9-24.9-38.5-.9-24 11.8-42.2 33-46.7 28.6-6.1 50.8 11.2 50.4 43-.1 4.4-2.9 4.8-6.2 4.8h-58.9c-1.9-.1-3.7-.1-5.9-.1zm1.2-11.4h55.9c-.1-15.3-11-26.2-26-26.3-15.6-.2-29.6 12.1-29.9 26.3zM842.8 73.1c1.4-14-5.1-22.5-18.2-24.3-11.1-1.5-20.7 1.6-28 10.3-2.5 2.9-4.9 5.1-8.6 2.4-3.7-2.8-2.7-6.3-.5-9.5 1.5-2.2 3.3-4.5 5.4-6.1 14.4-10.7 30.2-12.1 46.5-5.1 9.5 4.1 15.2 12.3 15.6 22.5.9 18.9.7 37.9.8 56.9 0 3.9-2.3 5.9-6.3 5.8-4-.1-6-2-5.8-6.1.1-1.8 0-3.7 0-6.5-1.6 1.3-2.4 1.9-3.2 2.6-12.4 11.4-26.8 13.7-42.4 8.8-9.1-2.9-14.5-9.4-15.5-19.1-1-9.9 2.6-17.8 11.3-22.9 10.8-6.3 22.9-7.7 35-8.7 4.5-.5 9.1-.7 13.9-1zm-.2 9.7c-9.8 1.2-19.4 2.1-28.9 3.6-3.8.6-7.5 2.2-10.9 4-5.9 3.1-8.4 8.4-7.4 14.4 1 6.2 5.3 9.5 11 10.7 17.7 3.9 40.5-6.1 36.2-32.7z M338.5 50.2c.7-1.1 1.3-2.4 2.2-3.3 10.5-10.7 23.3-12.4 36.9-8.2 13.3 4.1 20 14.6 20.9 27.7 1.2 18 .8 36.2.9 54.3 0 4-2.5 5.4-6.1 5.2-3.9-.1-5.8-2.1-5.8-6.2.1-13.7.1-27.3 0-41 0-3.2-.2-6.5-.7-9.7-1.9-11.5-8.4-18.5-18.2-20-12.1-1.8-23.5 3.1-28.1 13.2-2.1 4.7-3.2 10.1-3.4 15.3-.5 13.7-.1 27.3-.2 41 0 6-3.2 8.7-8.8 7.1-1.8-.5-3.2-2.9-4.5-4.6-.5-.7-.2-2-.2-3V9.9c0-6.2 1.2-7.5 6.3-7.6 5.3-.1 7.1 1.4 7.1 6.9.1 11.8 0 23.6 0 35.4 0 1.6.1 3.3.2 4.9.7.2 1.1.5 1.5.7z M469 73.1c1.3-13.6-5.3-22.3-17.9-24.2-11.3-1.7-21 1.4-28.5 10.2-2.5 2.9-5 5.1-8.6 2.4-3.7-2.8-2.7-6.4-.5-9.6 6.2-9.2 15.4-13.3 25.9-14.6 5.2-.7 10.6-.7 15.8.1 16.6 2.7 26.4 14.3 26.5 31.3.2 16.6.1 33.1 0 49.7 0 5.6-1.6 7.5-6 7.5-5 0-6.4-3.1-6.1-7.5.1-1.4 0-2.7 0-4.8-1.3 1-2.3 1.5-3 2.2-12.1 11.4-26.4 13.7-41.8 9.1-9.8-2.9-15.5-9.9-16.2-20.2-.9-10.1 3.4-17.8 12.4-22.7 10.6-5.7 22.3-7.1 34.1-8.1 4.6-.3 9.2-.5 13.9-.8zm0 9.9c-8.8.9-17.4 1.5-25.9 2.9-4.8.8-9.6 2.4-14 4.6-6.3 3.1-8.8 8.6-7.7 14.7.9 5.3 5.2 9.5 11.7 10.7 18.7 3.1 39.3-7.4 35.9-32.9z M63.9 127.4c-5.1-1.2-8.2-3.2-9.7-7.3-1.7-4.6-.3-8.3 3.2-11.5C68 98.9 78.6 89.2 89.1 79.5c24.2-22.1 48.4-44.3 72.7-66.4.5-.5.9-1.2 1.5-1.3 2-.6 4.1-1 6.1-1.5-.6 2.1-.5 4.7-1.8 6.1-31.8 35.3-63.8 70.4-95.8 105.5-2 2.3-5.2 3.7-7.9 5.5z M873.9 49.4h-8.8c-3.2 0-5.1-2-4.4-4.9.5-2 2.3-4.5 4.2-5.3 2.4-.9 5.3-.2 9-.2 0-4.6-.1-8.8 0-12.9.1-5.9 1.7-7.6 6.5-7.7 5.3-.1 6.1 3.3 6.1 7.4v12.9h27.8c-.2 8.1-2.7 10.6-9.7 10.7h-18.3v12.9l.3 35.9c0 1.5 0 3.1.2 4.6.9 12 8.5 16.6 19.5 11.6 3.3-1.5 6.6-3.2 8.8 1.1 2.1 4-.9 6.4-3.8 8.4-14.4 9.7-34.8 1-36.3-16.2-1.3-14.2-.8-28.7-1-43-.1-4.8-.1-9.6-.1-15.3z" />
|
||||
</svg>
|
Before Width: | Height: | Size: 4.2 KiB |
|
@ -1,31 +0,0 @@
|
|||
import { ReactComponent as AirbnbLogo } from './airbnb.svg'
|
||||
import { ReactComponent as UberLogo } from './uber.svg'
|
||||
import { ReactComponent as QuoraLogo } from './quora.svg'
|
||||
import { ReactComponent as RetrieverLogo } from './retriever.svg'
|
||||
import { ReactComponent as StitchfixLogo } from './stitchfix.svg'
|
||||
import { ReactComponent as ChartbeatLogo } from './chartbeat.svg'
|
||||
import { ReactComponent as AllenAILogo } from './allenai.svg'
|
||||
|
||||
import { ReactComponent as RecodeLogo } from './recode.svg'
|
||||
import { ReactComponent as WapoLogo } from './wapo.svg'
|
||||
import { ReactComponent as BBCLogo } from './bbc.svg'
|
||||
import { ReactComponent as MicrosoftLogo } from './microsoft.svg'
|
||||
import { ReactComponent as VenturebeatLogo } from './venturebeat.svg'
|
||||
import { ReactComponent as ThoughtworksLogo } from './thoughtworks.svg'
|
||||
|
||||
export default {
|
||||
airbnb: AirbnbLogo,
|
||||
uber: UberLogo,
|
||||
quora: QuoraLogo,
|
||||
retriever: RetrieverLogo,
|
||||
stitchfix: StitchfixLogo,
|
||||
chartbeat: ChartbeatLogo,
|
||||
allenai: AllenAILogo,
|
||||
|
||||
recode: RecodeLogo,
|
||||
wapo: WapoLogo,
|
||||
bbc: BBCLogo,
|
||||
microsoft: MicrosoftLogo,
|
||||
venturebeat: VenturebeatLogo,
|
||||
thoughtworks: ThoughtworksLogo,
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="130" height="28" viewBox="0 0 609 130">
|
||||
<path fill="#737373" d="M213.2 74.3l-3.6 10.2h-.3c-.6-2.3-1.7-5.8-3.5-10L186.5 26h-18.9v77.3h12.5V55.6c0-3 0-6.4-.1-10.6-.1-2.1-.3-3.7-.4-4.9h.3c.6 3 1.3 5.2 1.8 6.6l23.2 56.4h8.8l23-56.9c.5-1.3 1-3.9 1.5-6.1h.3c-.3 5.7-.5 10.8-.6 13.9v49h13.3V25.8H233l-19.8 48.5zm50.6-26.7h13V103h-13zm6.6-23.4c-2.2 0-4 .8-5.5 2.2-1.5 1.4-2.3 3.2-2.3 5.4 0 2.1.8 3.9 2.3 5.3 1.5 1.4 3.3 2.1 5.5 2.1s4.1-.8 5.5-2.1c1.5-1.4 2.3-3.2 2.3-5.3s-.8-3.9-2.3-5.4c-1.3-1.4-3.2-2.2-5.5-2.2m52.5 22.9c-2.4-.5-4.9-.8-7.3-.8-5.9 0-11.3 1.3-15.8 3.9-4.5 2.6-8.1 6.2-10.4 10.7-2.4 4.6-3.6 9.9-3.6 16 0 5.3 1.2 10 3.5 14.3 2.3 4.2 5.5 7.6 9.8 9.9 4.1 2.3 8.9 3.5 14.3 3.5 6.2 0 11.5-1.3 15.7-3.7l.1-.1v-12l-.5.4c-1.9 1.4-4.1 2.6-6.3 3.3-2.3.8-4.4 1.2-6.2 1.2-5.2 0-9.3-1.5-12.2-4.8-3-3.2-4.5-7.6-4.5-13.1 0-5.7 1.5-10.2 4.6-13.5 3.1-3.3 7.2-5 12.2-5 4.2 0 8.5 1.4 12.4 4.2l.5.4V49.2l-.1-.1c-1.7-.7-3.6-1.5-6.2-2m42.9-.4c-3.2 0-6.2 1-8.8 3.1-2.2 1.8-3.7 4.4-5 7.5h-.1v-9.7h-13V103h13V74.7c0-4.8 1-8.8 3.2-11.7 2.2-3 5-4.5 8.4-4.5 1.2 0 2.4.3 3.9.5 1.4.4 2.4.8 3.1 1.3l.5.4v-13l-.3-.1c-.9-.6-2.7-.9-4.9-.9m35.4-.3c-9.1 0-16.4 2.7-21.5 8-5.2 5.3-7.7 12.6-7.7 21.8 0 8.6 2.6 15.6 7.6 20.7 5 5 11.8 7.6 20.3 7.6 8.9 0 16-2.7 21.1-8.1 5.2-5.4 7.7-12.6 7.7-21.5 0-8.8-2.4-15.8-7.3-20.9-4.7-5.1-11.6-7.6-20.2-7.6M411.6 89c-2.4 3.1-6.2 4.6-10.9 4.6s-8.5-1.5-11.2-4.8c-2.7-3.1-4-7.6-4-13.3 0-5.9 1.4-10.4 4-13.6 2.7-3.2 6.4-4.8 11.1-4.8 4.6 0 8.2 1.5 10.8 4.6 2.6 3.1 4 7.6 4 13.5-.2 6-1.3 10.7-3.8 13.8m46.1-18.4c-4.1-1.7-6.7-3-7.9-4.1-1-1-1.5-2.4-1.5-4.2 0-1.5.6-3 2.1-4s3.2-1.5 5.7-1.5c2.2 0 4.5.4 6.7 1s4.2 1.5 5.8 2.7l.5.4V48.7l-.3-.1c-1.5-.6-3.5-1.2-5.9-1.7-2.4-.4-4.6-.6-6.4-.6-6.2 0-11.3 1.5-15.3 4.8-4 3.1-5.9 7.3-5.9 12.2 0 2.6.4 4.9 1.3 6.8.9 1.9 2.2 3.7 4 5.2 1.8 1.4 4.4 3 8 4.5 3 1.3 5.3 2.3 6.7 3.1 1.4.8 2.3 1.7 3 2.4.5.8.8 1.8.8 3.1 0 3.7-2.8 5.5-8.5 5.5-2.2 0-4.5-.4-7.2-1.3s-5.2-2.2-7.3-3.7l-.5-.4v12.7l.3.1c1.9.9 4.2 1.5 7 2.2 2.8.5 5.3.9 7.5.9 6.7 0 12.2-1.5 16.1-4.8 4-3.2 6.1-7.3 6.1-12.6 0-3.7-1-7-3.2-9.5-2.9-2.4-6.5-4.9-11.7-6.9m49.2-24.2c-9.1 0-16.4 2.7-21.5 8s-7.7 12.6-7.7 21.8c0 8.6 2.6 15.6 7.6 20.7 5 5 11.8 7.6 20.3 7.6 8.9 0 16-2.7 21.1-8.1 5.2-5.4 7.7-12.6 7.7-21.5 0-8.8-2.4-15.8-7.3-20.9-4.7-5.1-11.6-7.6-20.2-7.6M517.2 89c-2.4 3.1-6.2 4.6-10.9 4.6-4.8 0-8.5-1.5-11.2-4.8-2.7-3.1-4-7.6-4-13.3 0-5.9 1.4-10.4 4-13.6 2.7-3.2 6.4-4.8 11.1-4.8 4.5 0 8.2 1.5 10.8 4.6 2.6 3.1 4 7.6 4 13.5 0 6-1.3 10.7-3.8 13.8M603.9 58.3V47.6h-13.1V31.2l-.4.1L578 35l-.3.1v12.5h-19.6v-7c0-3.2.8-5.7 2.2-7.3s3.5-2.4 6.1-2.4c1.8 0 3.7.4 5.8 1.3l.5.3V21.2l-.3-.1c-1.8-.6-4.2-1-7.3-1-3.9 0-7.3.9-10.4 2.4-3.1 1.7-5.4 4-7.1 7.1-1.7 3-2.6 6.4-2.6 10.3v7.7h-9.1v10.6h9.1V103h13.1V58.3h19.6v28.5c0 11.7 5.5 17.6 16.5 17.6 1.8 0 3.7-.3 5.5-.6 1.9-.4 3.3-.9 4.1-1.3l.1-.1V91.7l-.5.4c-.8.5-1.5.9-2.7 1.2-1 .3-1.9.4-2.6.4-2.6 0-4.4-.6-5.7-2.1-1.2-1.4-1.8-3.7-1.8-7.1V58.3h13.3z" />
|
||||
<path fill="#F25022" d="M0 0h61.3v61.3H0z" />
|
||||
<path fill="#7FBA00" d="M67.7 0H129v61.3H67.7z" />
|
||||
<path fill="#00A4EF" d="M0 67.7h61.3V129H0z" />
|
||||
<path fill="#FFB900" d="M67.7 67.7H129V129H67.7z" />
|
||||
</svg>
|
Before Width: | Height: | Size: 3.1 KiB |
|
@ -1,3 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="120" height="34" viewBox="0 0 201 56">
|
||||
<path fill="#b92b27" d="M29 43.62c-1.93-3.77-4.18-7.6-8.57-7.6-.84 0-1.68.15-2.45.5l-1.5-2.98c1.83-1.56 4.77-2.8 8.54-2.8 5.87 0 8.88 2.83 11.27 6.44 1.42-3.08 2.1-7.24 2.1-12.4 0-12.88-4.04-19.5-13.45-19.5-9.27 0-13.28 6.62-13.28 19.5 0 12.82 4 19.36 13.28 19.36 1.47 0 2.8-.16 4.04-.52zm2.3 4.5c-2.05.54-4.2.85-6.35.85C12.6 48.96.5 39.1.5 24.76.5 10.32 12.6.48 24.96.48c12.56 0 24.53 9.77 24.53 24.3 0 8.1-3.77 14.67-9.26 18.9 1.78 2.67 3.6 4.43 6.14 4.43 2.77 0 3.9-2.14 4.08-3.82h3.6c.22 2.24-.9 11.53-11 11.53-6.1 0-9.33-3.53-11.76-7.68zm26.12-12.3V19.27c0-1.9-.7-2.73-2.86-2.73h-2.3v-4.4H67.3v23.5c0 3.95 2.15 5.7 5.4 5.7 2.7 0 5.37-1.2 6.8-3.9V19.26c0-1.9-.7-2.73-2.85-2.73h-2.45v-4.4h15.2v24.6c0 2.45.92 3.57 3.72 3.57h.5v4.54L80 47v-4.67h-.28c-2.63 3.2-6.34 5.38-11.62 5.38-5.95 0-10.7-3-10.7-11.87m56 7.48c5.36 0 7.4-4.66 7.5-14.04.1-9.2-2.14-13.63-7.5-13.63-4.68 0-7.62 4.45-7.62 13.63 0 9.38 2.9 14.04 7.62 14.04zm0 4.4c-9.7 0-18.43-7.4-18.43-18.44 0-10.84 8.52-18.04 18.42-18.04 10.32 0 18.6 7.34 18.6 18.04 0 11.04-8.28 18.45-18.6 18.45zm18.9-.7v-4.4h1.47c3.62 0 3.97-1.04 3.97-4.2V19.27c0-1.9-.98-2.72-3.2-2.72h-1.97v-4.4h13.82l.7 7.2h.27c1.53-5.18 5.66-7.9 9.52-7.9 3.2 0 5.7 1.8 5.7 5.5 0 2.55-1.25 5.28-4.7 5.28-3.1 0-3.7-2.1-6.26-2.1-2.3 0-4.06 2.17-4.06 5.36V38.4c0 3.16.77 4.2 4.34 4.2h2.02V47h-21.64m46-5.12c4.4 0 6.2-4.17 6.2-8.36v-5.6c-3.2 3.34-10.68 3.46-10.68 9.4 0 2.9 1.72 4.56 4.47 4.56zm6.42-.02c-1.82 3.5-5.55 5.85-10.76 5.85-6.06 0-9.97-3.2-9.97-8.87 0-11.4 15.87-8.36 20.53-15.9v-.83c0-5.8-2.28-6.7-4.8-6.7-7.06 0-3.84 7.6-10.34 7.6-3.14 0-4.35-1.9-4.35-4.02 0-4.3 5.13-7.76 14.75-7.76 9.1 0 14.7 2.52 14.7 11.58v14.47c0 2.24.82 3.45 2.77 3.45.84 0 1.54-.23 2.08-.6l1.16 2.83c-.94 1.47-3.48 4.06-8.3 4.06-4.2 0-6.83-1.95-7.18-5.14h-.28z"></path>
|
||||
</svg>
|
Before Width: | Height: | Size: 1.8 KiB |
|
@ -1,3 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="100" height="24" viewBox="0 0 824 203">
|
||||
<path fill="#EE2C24" d="M84.8 90V49c-18 0-30.7 7.6-38 22.8v-20H4.6v148.1h45v-72.4c0-27.9 5.9-37.4 34.1-37.4h1.1zM823.4 137c0-54.5-31.9-88-78.5-88-19.9 0-39.2 8.2-54 22.9-7.9 7.9-13.8 17.1-17.6 27.2V4.2h-44.5v63.9c-10-13-24.1-19.1-42.4-19.1-19.1 0-36.3 7-50.4 20.8-10.2 9.8-17.3 21.3-21 34.1-3.8-12.5-10.8-24.2-20.8-33.7-14.7-13.9-33-21.3-54.3-21.3-19.9 0-39.3 8.2-54 22.9C378 79.7 372 89 368.2 99.2c-12.8-32.6-38.6-50.2-73-50.2-15.5 0-29.3 4.2-41.5 12.5l-21.2 51.3c-8.1-40-36.9-63.8-76.1-63.8-19.9 0-39.2 8.2-54 22.9C87.9 86.3 80 105.1 80 125.5c0 20.7 7.4 39.2 21.5 54.2 15 15.8 33 23.4 55.3 23.4 16.7 0 31.2-4.1 43.3-12.1l14-34h-33.6c-6.5 5.7-13.6 8.2-22.3 8.2-18.8 0-30-8.2-33.8-23.7h96.2c2.9 14.2 9.5 27.3 19.9 38.2 15 15.8 32.9 23.4 55.2 23.4 33.5 0 60.4-18.5 72.6-49.8 3.6 9.7 9.2 18.5 16.6 26.3 15 15.8 33 23.5 55.4 23.5 22.1 0 40.7-7.9 55.7-22.9 9.1-9.1 15.4-19.5 18.8-31.4 4 14 11.8 26.3 23.3 36.6 13.2 11.8 29 17.7 47.3 17.7 18 0 31.5-4.8 44.7-17.7v14.4h43.1v-47c3.6 9.8 9.2 18.9 16.8 26.9 15 15.8 33 23.4 55.3 23.4 32.7 0 57-15.5 71.1-46.1H769c-6.5 5.7-13.6 8.2-22.3 8.2-18.8 0-30-8.2-33.8-23.7h110.4V137zm-700.3-25.9c3-13.9 15.8-23.2 33.8-23.2 17.2 0 29.4 8.2 34.6 23.2h-68.4zm201.5 36c-7.1 9.5-16.6 14.4-28.3 14.4-20.7 0-34.8-13.9-34.8-36.2 0-20.1 14.1-34.8 33.5-34.8 14.1 0 24.2 5.4 30.2 15.5H366c-1.7 6.3-2.5 12.8-2.5 19.6 0 7.4 1 14.6 2.9 21.5h-41.8zm116.1 16.1c-19.3 0-35.6-16.5-35.6-37 0-20.7 16-37.3 35.6-37.3s35.6 16.5 35.6 37.3c0 20.5-16 37-35.6 37m154.7 0c-22.7 0-39-16-39-37 0-20.2 16.3-37.3 37.3-37.3 20.7 0 36.7 16.3 36.7 37.8.1 20.8-15.9 36.5-35 36.5m116.3-52.1c3-13.9 15.8-23.2 33.8-23.2 17.2 0 29.4 8.2 34.6 23.2h-68.4z"></path>
|
||||
</svg>
|
Before Width: | Height: | Size: 1.7 KiB |
Before Width: | Height: | Size: 6.9 KiB |
|
@ -1,3 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="18" viewBox="0 0 224.6 26.6">
|
||||
<path fill="#2F3237" d="M9.7 11.7l-.3-.1c-4-1.1-5.9-1.9-5.9-5.1 0-2.7 1.6-4.2 4.3-4.2 0 0 5.7 0 7.3 4.7l.1.2H17V0h-1.8l-.7 2.6C12.8 1 10.6 0 8.1 0c-4.7 0-8 3-8 7.3 0 4.8 4 6.2 8.5 7.3 4.5 1.1 6.4 2 6.4 5.3 0 2.9-2.4 4.3-4.5 4.3-3.8 0-6.5-1.5-8.6-4.6l-.1-.2H0v7.2h1.8l.8-3.1c2.2 2.1 4.6 3.1 7.6 3.1 4.8 0 8.2-3.3 8.2-7.7 0-4.9-4-6-8.7-7.2 M22.1 5.5h1.7l.1-.5c.6-2.9.7-2.9 2.7-2.9h4.6v22.3h-2.5v2.1h8.7v-2.1H35V2.1h4.5c2.1 0 2.3 0 2.9 2.9l.1.5h1.7V0H22.1v5.5z M63.2 5.5h1.7L65 5c.6-2.9.7-2.9 2.7-2.9h4.6v22.3h-2.4v2.1h8.7v-2.1h-2.4V2.1h4.5c2.1 0 2.3 0 2.9 2.9l.1.5h1.7V0H63.2v5.5zM49.3 0v2.1h2.5v22.4h-2.5v2.1H58v-2.1h-2.4V2.1H58V0zM132.4 0v2.1h2.5V13h-12.4V2.1h2.4V0h-8.7v2.1h2.6v22.4h-2.5v2.1h8.7v-2.1h-2.5v-9.4h12.4v9.4h-2.5v2.1h8.7v-2.1h-2.4V2.1h2.4V0zM186.4 0v2.1h2.5v22.4h-2.5v2.1h8.7v-2.1h-2.4V2.1h2.4V0zM215.9 0v2.1h2l-5.6 8.1-5.4-8.1h2V0h-8.7v2.1h2.5l7.6 11.3-7.5 11.1h-2.7v2.1h8.7v-2.1H206l5.8-8.7 5.8 8.7h-2.2v2.1h8.7v-2.1h-2.4l-7.8-11.9 7.2-10.5h3.5V0z M161.4 2.1h2.6v22.3h-2.6v2.1h8.7v-2.1h-2.3V15h4c1.5 0 1.6.2 2.1 1.7l.1.2v.2h1.7v-6.6H174v.2c0 .2-.1.3-.2.4-.5 1.2-.7 1.8-2.1 1.8h-4V2.1h9.1c1.7 0 2.1.5 3.1 3v.2h2V0h-20.6l.1 2.1z M108.2 16.1c-.8 5.2-3.6 8.1-7.9 8.1-5.1 0-8.2-4.3-8.2-11.1 0-6.5 3.1-10.7 7.9-10.7 4.6 0 7 2.9 8.1 5.4l.1.2h1.8V0h-2l-.7 2.8C105.4.9 103 0 100.1 0c-6.8 0-12 5.7-12 13.3 0 3.9 1.2 7.2 3.3 9.6 2.1 2.4 5.1 3.6 8.4 3.6 6 0 9.8-3.7 10.6-10.4v-.3h-2.3v.3z"></path>
|
||||
</svg>
|
Before Width: | Height: | Size: 1.5 KiB |
|
@ -1,3 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="28" viewBox="35 20 150 25">
|
||||
<path fill="#001e2b" d="M175.1 28.47a2 2 0 1 1 2 2 2 2 0 0 1-2-2m3.46 0A1.47 1.47 0 1 0 177.1 30a1.45 1.45 0 0 0 1.46-1.53m-.48 1.1h-.52l-.48-.92h-.36v.92h-.46V27.4h1.05c.57 0 .84.16.84.67 0 .4-.2.56-.58.58zm-.77-1.24c.26 0 .42-.05.42-.32 0-.3-.3-.26-.5-.26h-.5v.6zM49.68 24.5h-4.6v14.74h-3.56V24.5h-4.6v-3h12.76v3M54.4 27.8a6.14 6.14 0 0 1 3.8-1.6c1.4 0 3.36.84 3.36 3.98v9.05h-3.38v-7.68c0-1.02.08-2.64-1.38-2.64a3.5 3.5 0 0 0-2.4 1.4v8.94h-3.37V21.5h3.37v6.3M62.95 32.85c0-3.4 1.87-6.65 5.6-6.65s5.62 3.26 5.62 6.65c0 3.38-1.9 6.66-5.6 6.66s-5.62-3.27-5.62-6.65m7.76 0c0-1.6-.32-4.17-2.14-4.17s-2.14 2.58-2.14 4.17.32 4.17 2.14 4.17 2.14-2.57 2.14-4.17M82.8 37.92a6.16 6.16 0 0 1-3.8 1.6c-1.4 0-3.35-.86-3.35-4v-9.05h3.37v7.68c0 1.03-.06 2.65 1.4 2.65a3.5 3.5 0 0 0 2.4-1.4v-8.93h3.37v12.77h-3.4v-1.32M91.7 35.04a.87.87 0 0 0-.96.8c0 1.66 8.85-1.23 8.85 3.76 0 1.9-1.3 3.9-6.35 3.9-4.4 0-5.9-1.55-5.9-3.2a2.2 2.2 0 0 1 1.4-1.95 2.03 2.03 0 0 1-1-1.62 2.9 2.9 0 0 1 1.65-2.55 4.8 4.8 0 0 1-1.6-3.5c0-2.84 2.4-4.48 5.15-4.48a6.13 6.13 0 0 1 3.15.9 3.13 3.13 0 0 1 2.3-1.06 4.97 4.97 0 0 1 .98.1v2.36a3 3 0 0 0-1.07-.27 1.86 1.86 0 0 0-.9.27 5.1 5.1 0 0 1 .68 2.17c0 2.82-2.37 4.46-5.13 4.46l-1.25-.1m-1.07 4.23a.88.88 0 0 0-.53.8c0 1.18 2.4 1.25 3.2 1.25.57 0 3.53-.06 3.53-1.08 0-.66-.45-.6-2.46-.7zm2.32-6.36c1.42 0 1.83-1.16 1.83-2.37 0-1.17-.55-2.15-1.83-2.15-1.4 0-1.84 1.2-1.84 2.4 0 1.16.55 2.12 1.85 2.12M104.2 27.8a6.14 6.14 0 0 1 3.82-1.6c1.4 0 3.35.84 3.35 3.98v9.05H108v-7.68c0-1.02.07-2.64-1.4-2.64a3.5 3.5 0 0 0-2.4 1.4v8.94h-3.36V21.5h3.37v6.3M117.5 26.47h2.63v2.8h-2.62v6.5c0 .62.2 1.25 1.28 1.25a2.4 2.4 0 0 0 1.35-.47v2.57a7.83 7.83 0 0 1-2.17.4c-2.1 0-3.83-1.2-3.83-3.98v-6.27h-1.57v-2.8h1.57v-3.2h3.38v3.2M134.17 39.24h-1.82l-3-14.5h-.06l-3.18 14.5h-1.82l-3.72-17.74h1.78l2.92 14.32h.05l3.2-14.32h1.74l3.04 14.32h.05l3.12-14.32h1.75l-4.05 17.74M137.46 32.94c0-4.7 2.23-6.65 5.03-6.65s5.03 1.93 5.03 6.64-2.24 6.66-5.04 6.66-5.04-1.94-5.04-6.66m8.3 0c0-2.14-.53-5.15-3.27-5.15s-3.26 3-3.26 5.14.52 5.16 3.25 5.16 3.26-3 3.26-5.16M150.7 29.36h.05c.7-1.5 1.6-2.9 3.48-2.9h.7v1.63c-2.34-.33-3.55 2.18-4.23 4v7.14h-1.78v-12.6h1.78v2.72M158.13 32.5l4.68-5.85h2l-3.48 4.3 4 8.3h-1.92l-3.22-6.85-2.05 2.53v4.3h-1.78V21.5h1.78v11M173.02 29.46a3.26 3.26 0 0 0-2.88-1.67c-1.16 0-2.25.58-2.25 1.86 0 3.08 6.62 1.73 6.62 5.97a3.84 3.84 0 0 1-4.12 3.97 5.17 5.17 0 0 1-4.7-2.64l1.4-.9a3.5 3.5 0 0 0 3.3 2.04 2.2 2.2 0 0 0 2.46-2.14c0-3.17-6.63-1.66-6.63-6.18a3.56 3.56 0 0 1 3.85-3.5 4.83 4.83 0 0 1 4.2 2.2l-1.26.98"></path>
|
||||
</svg>
|
Before Width: | Height: | Size: 2.6 KiB |
|
@ -1,3 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="90" height="31" viewBox="0 0 927 322">
|
||||
<path fill="#010202" d="M53.328 229.809c3.917 10.395 9.34 19.283 16.27 26.664 6.93 7.382 15.14 13.031 24.63 16.948 9.491 3.917 19.81 5.875 30.958 5.875 10.847 0 21.015-2.034 30.506-6.102s17.776-9.792 24.856-17.173c7.08-7.382 12.579-16.194 16.496-26.438 3.917-10.244 5.875-21.692 5.875-34.347V0h47.453v316.354h-47.001v-29.376c-10.545 11.147-22.974 19.734-37.285 25.761-14.312 6.025-29.752 9.038-46.323 9.038-16.873 0-32.615-2.938-47.228-8.813-14.612-5.875-27.267-14.235-37.962-25.082S15.441 264.006 9.265 248.79C3.088 233.575 0 216.628 0 197.947V0h47.453v195.236C47.453 207.891 49.411 219.414 53.328 229.809zM332.168 0v115.243c10.545-10.545 22.748-18.905 36.607-25.082s28.924-9.265 45.193-9.265c16.873 0 32.689 3.163 47.453 9.49 14.763 6.327 27.567 14.914 38.414 25.761s19.434 23.651 25.761 38.414c6.327 14.764 9.49 30.431 9.49 47.002 0 16.57-3.163 32.162-9.49 46.774-6.327 14.613-14.914 27.343-25.761 38.188-10.847 10.847-23.651 19.434-38.414 25.761-14.764 6.327-30.581 9.49-47.453 9.49-16.27 0-31.409-3.088-45.419-9.265-14.01-6.176-26.288-14.537-36.833-25.082v28.924h-45.193V0H332.168zM337.365 232.746c4.067 9.642 9.717 18.078 16.948 25.309 7.231 7.231 15.667 12.956 25.308 17.174 9.642 4.218 20.036 6.327 31.184 6.327 10.847 0 21.09-2.109 30.731-6.327s18.001-9.942 25.083-17.174c7.08-7.23 12.729-15.667 16.947-25.309 4.218-9.641 6.327-20.035 6.327-31.183 0-11.148-2.109-21.618-6.327-31.41s-9.867-18.303-16.947-25.534c-7.081-7.23-15.441-12.88-25.083-16.947s-19.885-6.102-30.731-6.102c-10.846 0-21.09 2.034-30.731 6.102s-18.077 9.717-25.309 16.947c-7.23 7.231-12.955 15.742-17.173 25.534-4.218 9.792-6.327 20.262-6.327 31.41C331.264 212.711 333.298 223.105 337.365 232.746zM560.842 155.014c6.025-14.462 14.312-27.191 24.856-38.188s23.049-19.659 37.511-25.986 30.129-9.49 47.001-9.49c16.571 0 31.937 3.013 46.098 9.038 14.16 6.026 26.362 14.387 36.606 25.083 10.244 10.695 18.229 23.35 23.952 37.962 5.725 14.613 8.587 30.506 8.587 47.68v14.914H597.901c1.507 9.34 4.52 18.002 9.039 25.985 4.52 7.984 10.168 14.914 16.947 20.789 6.779 5.876 14.462 10.471 23.049 13.784 8.587 3.314 17.7 4.972 27.342 4.972 27.418 0 49.563-11.299 66.435-33.896l32.991 24.404c-11.449 15.366-25.609 27.418-42.481 36.155-16.873 8.737-35.854 13.106-56.944 13.106-17.174 0-33.217-3.014-48.131-9.039s-27.869-14.462-38.866-25.309-19.659-23.576-25.986-38.188-9.491-30.506-9.491-47.679C551.803 184.842 554.817 169.476 560.842 155.014zM624.339 137.162c-12.805 10.696-21.316 24.932-25.534 42.708h140.552c-3.917-17.776-12.278-32.012-25.083-42.708-12.805-10.695-27.794-16.043-44.967-16.043C652.133 121.119 637.144 126.467 624.339 137.162zM870.866 142.359c-9.641 10.545-14.462 24.856-14.462 42.934v131.062h-45.646V85.868h45.193v28.472c5.725-9.34 13.182-16.722 22.371-22.145 9.189-5.424 20.111-8.136 32.766-8.136h15.817v42.482h-18.981C892.86 126.542 880.507 131.814 870.866 142.359z"/>
|
||||
</svg>
|
Before Width: | Height: | Size: 2.9 KiB |
|
@ -1,4 +0,0 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" width="150" height="19" viewBox="0 0 1743 222.2">
|
||||
<path d="M208 0v44.4c-3.5 0-6.5.4-9.4-.1-4.1-.8-5.5.9-6.6 4.5-13.9 45-28 89.9-42 134.8-3.2 10.3-6.3 20.7-9.8 30.9-.5 1.4-2.5 3.3-3.8 3.3-22.5.2-45 .1-67.8.1-.5-1.4-1.1-2.7-1.6-4.1-17.4-55-34.8-110.1-52-165.1-1.2-3.7-2.7-5.1-6.7-4.5-2.6.5-5.5-.1-8.3-.2V0h94v44.3H74.9c10.5 41.1 20.9 81.7 31.3 122.3.3 0 .6.1 1 .1 11.2-40.6 22.4-81.3 33.8-122.5h-18.9V0H208z M356 58.3h63.2c.6 7.4 1.2 14.7 1.9 22.2 3.8-4.4 7-8.9 11-12.4 17.9-15.4 38.5-18.4 60.2-10.4 16.4 6.1 23.4 19.6 23.7 36.5.4 24.1.2 48.3.2 72.5v6.6l12.9.6v43.7h-70.8V212v-92.5c0-8.4-2.9-12.7-9.3-14.8-6.7-2.2-13.6 0-18.2 6-1.1 1.4-1.9 3.1-2.7 4.8-.5 1.2-1 2.6-1 3.8-.1 17.9 0 35.8 0 54.2h9.7v44.1H356v-43.9h12.3v-70.8h-12.2c-.1-15.2-.1-29.7-.1-44.6zM741.9 102.4h-10.8v-44c.8-.1 1.6-.3 2.4-.3h66.6v115.6H813v43.9h-65.5v-16.5c-2.9 3.1-4.9 5.4-7.2 7.5-15.9 14.1-43.9 17.9-62.9 8.3-14.5-7.3-21.7-19.7-22.3-35.4-.9-24.3-.7-48.6-1-72.9v-6.3h-12.7v-44H712v5.6c0 29.3-.1 58.6.1 88 0 4.1.7 8.3 2 12.2 2 5.9 7 8.9 13.2 8.7 6.1-.2 10.5-3.1 12.6-8.8.8-2.2 1.7-4.5 1.7-6.7.4-18.1.3-36.3.3-54.9z M345.7 149h-98.5c-.2 9.1.1 17.6 4.5 25.4 3.6 6.5 9.6 8.9 16.8 8.6 7.2-.3 12.9-3.3 15.9-10.1 1.3-3 2.1-6.2 3.3-9.6h54.6c-2.2 17.5-8.9 32.3-22.9 43.3-9.9 7.8-21.4 12-33.8 13.8-16.7 2.5-33.2 1.8-49.4-3.4-21.7-7-36.3-21.4-43-43-7.3-23.3-7.6-47 .1-70.3 9.4-28.7 30.1-44.2 59.5-48.6 13.2-2 26.3-1.1 39.1 2.4 29.9 8.1 45.9 28.7 50.8 58.4 1.8 10.6 2 21.5 3 33.1zm-96.9-30.8H287c.5-8.5-.7-16.1-8.2-20.9-6.8-4.3-14.3-4.7-21.2-.4-7.7 4.9-8.7 12.8-8.8 21.3zM1114 148.9h-98.2c-.2 9-.2 17.6 4.3 25.4 3.8 6.7 9.9 9.1 17.3 8.7 7.4-.4 13.1-3.8 15.9-10.9 1.1-2.8 1.8-5.7 2.8-8.8h54.7c-3.5 32.1-26 53.9-59.4 57.6-15.6 1.7-30.9 1-46-3.7-22.3-7-37.2-21.7-44-44-6.9-23-7.2-46.3.3-69.3 9.5-28.9 30.3-44.5 59.9-48.8 13.9-2 27.6-.9 41 3.1 27.5 8.3 43 27.6 48.1 55.2 2.1 11.4 2.2 23.1 3.3 35.5zm-96.4-30.8h38c.1-16-7.7-24.9-20.6-23.9-11.9.9-19.2 11-17.4 23.9z M535.6 58h18c0-10.6.4-20.9-.1-31.2-.3-5.4 1.5-7.4 6.8-8.5 15.2-3.1 30.2-6.7 46-10.3v50h25.6v44.7h-25c-.2 1.8-.4 3.3-.4 4.7v51.5c0 1.8.2 3.7.4 5.5 1.3 9.8 8.2 14.9 18 13.3 1.6-.3 3.2-.6 5.6-1v27.7c0 12.9 0 12.9-12.7 14.9-13.6 2.2-27.1 2.9-40.7-.3-19.1-4.6-27.8-15.5-27.9-35.3V103h-13.7c.1-15.3.1-29.8.1-45zM826.2 217.6v-43.9h12.7v-70.9h-12.6V58.3h62.1l1.9 25.3 2-4.4c5.1-12.9 14.4-20.7 28.3-22.2 6.7-.7 13.6-.1 20.3.3 1.2.1 3.4 2 3.4 3.1.2 15.8 0 31.6 0 47.5 0 .3-.3.6-.6 1.1-7.6 0-15.5-1-23.1.2-16.2 2.6-23.8 12-24.5 28.5-.2 5.8-.2 11.7-.3 17.5v18.2h18v44.3c-29.1-.1-58.1-.1-87.6-.1z" />
|
||||
<path fill="#ED1E25" d="M1237 .3c8.5 1.4 17.1 2.2 25.4 4.3 34.3 8.6 51.7 50.6 33.5 80.3-4.4 7.2-10.5 12.4-17.7 16.5-3.2 1.8-6.4 3.5-10.3 5.5 2 .8 3.4 1.6 4.9 2 23.7 6.9 34.2 24.4 35.9 47.6 2.4 31.9-17.7 55.7-49.6 59.6-9.9 1.2-19.9 1.9-29.9 1.9-31.7.2-63.3.1-95 .1h-5.8v-43.8h18.9V44.4H1128V.2c36.3.1 72.7.1 109 .1zm-32.3 128.8c0 14.9-.1 28.5.1 42.2 0 .9 2 2.7 3 2.7 8.3 0 16.7 0 24.9-.7 6.1-.5 11.7-2.8 15.1-8.4 8-13.2.4-31.6-14.7-34.2-9-1.6-18.4-1.1-28.4-1.6zm.2-40.5c8.7-.5 16.9-.2 24.8-1.6 9.6-1.7 16.2-11 16.3-21.2 0-10.2-5.9-19.7-14.7-21.3-8.5-1.5-17.4-1.4-26.4-2v46.1z M1743 103.3c-7.5-.1-15-.4-22.4-.2-1.1 0-3.2 1.9-3.2 3-.2 18.8-.6 37.7.1 56.5.4 12.3 7.9 17.4 20 15.2 1-.2 2-.2 3.2-.3.2 1.2.5 2.3.5 3.4 0 10.8 0 21.7.1 32.5 0 2.4-.3 4.2-3.1 4.7-16.5 2.7-32.9 5.1-49.6 1.2-18.7-4.4-27.7-14.3-28.1-33.4-.5-25.5-.2-51-.3-76.5V103h-6.4c-8.3-.1-7.3.9-7.4-7.6V58.5h18.4c0-10.1-.1-19.8 0-29.4.1-10.6-1.5-8.2 8.7-10.7 14.2-3.4 28.5-6.5 43.5-10v49.9h26v45z M1569.2 119.2c0-5.4.3-10-.1-14.6-.6-8.5-6.1-14.1-13.8-14.3-7.7-.2-14.1 5.5-15.3 13.7-.3 1.8-.3 3.6-.5 5.8h-53.3c-1.9-20.2 8.6-38.7 28.2-47.2 28.5-12.3 57.2-11.2 85.1 2.2 17.1 8.2 25.9 22.7 26.2 41.7.4 20.3.2 40.7.3 61v6.6h12.8v43.8h-66.2c-.5-5.4-1-11-1.6-17.4-1.5 1.7-2.5 2.7-3.4 3.8-17.3 21.3-50.3 21.2-67.2 11.3-13.4-7.9-19.2-20.5-20.1-35.4-2-32.6 15.1-53.7 48.1-58.7 11.6-1.8 23.5-1.6 35.3-2.3 1.6-.1 3.2 0 5.5 0zm.7 28.2c-5.4 0-9.7-.6-13.9.1-12.9 2.1-19.5 11.1-18.1 24.1 1.2 10.7 10.4 16.1 20.3 11.9 5.3-2.2 8.9-6.3 9.7-11.8 1.2-7.9 1.4-16 2-24.3z M1475.6 149.2h-98.5c0 9.7.1 18.9 5.6 27 4.2 6.2 10.6 7.7 17.6 7 6.8-.7 11.9-4.1 14.6-10.5 1.2-2.7 1.8-5.7 2.8-9h54.4c-2.2 17.5-8.9 32.5-23.3 43.3-17 12.8-36.8 15.8-57.3 14.4-8.4-.5-16.9-2-25-4.5-21.4-6.5-36-20.6-42.8-41.9-8-25-8.2-50.2 1.1-74.9 10.3-27.1 31.1-41 59.2-44.8 13.7-1.8 27.3-.7 40.5 3.4 28.2 8.7 43.2 28.8 47.9 57 2.1 10.8 2.3 21.8 3.2 33.5zm-58.1-30.5c.1-9-.9-17.2-9.5-21.8-7.3-3.9-14.9-4-21.6 1.2-6.6 5.1-7.8 12.5-7.3 20.6h38.4z" />
|
||||
</svg>
|
Before Width: | Height: | Size: 4.4 KiB |
Before Width: | Height: | Size: 8.8 KiB |
|
@ -38,7 +38,10 @@ function remarkCodeBlocks(userOptions = {}) {
|
|||
}
|
||||
// Overwrite the code text with the rest of the lines
|
||||
node.value = lines.slice(1).join('\n')
|
||||
} else if (firstLine && /^https:\/\/github.com/.test(firstLine)) {
|
||||
} else if (
|
||||
(firstLine && /^https:\/\/github.com/.test(firstLine)) ||
|
||||
firstLine.startsWith('%%GITHUB_')
|
||||
) {
|
||||
// GitHub URL
|
||||
attrs.github = 'true'
|
||||
}
|
||||
|
|
|
@ -76,7 +76,7 @@
|
|||
.banner
|
||||
background: var(--color-theme)
|
||||
color: var(--color-back)
|
||||
padding: 5rem
|
||||
padding: 1rem 5rem
|
||||
margin-bottom: var(--spacing-md)
|
||||
background-size: cover
|
||||
|
||||
|
@ -143,21 +143,3 @@
|
|||
|
||||
.banner-button-element
|
||||
background: var(--color-theme)
|
||||
|
||||
.logos
|
||||
text-align: center
|
||||
padding-bottom: 1rem
|
||||
|
||||
& + &
|
||||
padding-bottom: 7.5rem
|
||||
|
||||
.logos-content
|
||||
display: flex
|
||||
justify-content: center
|
||||
align-items: center
|
||||
flex-flow: row wrap
|
||||
margin-bottom: 0
|
||||
|
||||
.logo
|
||||
display: inline-block
|
||||
padding: 1.8rem
|
||||
|
|
|
@ -84,7 +84,6 @@
|
|||
--syntax-function: hsl(195, 70%, 54%)
|
||||
--syntax-keyword: hsl(343, 100%, 68%)
|
||||
--syntax-operator: var(--syntax-keyword)
|
||||
--syntax-regex: hsl(45, 90%, 55%)
|
||||
|
||||
// Other
|
||||
--color-inline-code-text: var(--color-dark)
|
||||
|
@ -345,9 +344,6 @@ body [id]:target
|
|||
&.atrule, &.function, &.selector
|
||||
color: var(--syntax-function)
|
||||
|
||||
&.regex, &.important
|
||||
color: var(--syntax-regex)
|
||||
|
||||
&.keyword
|
||||
color: var(--syntax-keyword)
|
||||
|
||||
|
@ -528,11 +524,14 @@ body [id]:target
|
|||
.gatsby-resp-image-figure
|
||||
margin-bottom: var(--spacing-md)
|
||||
|
||||
.gatsby-resp-image-figcaption
|
||||
.gatsby-resp-image-figcaption, .caption
|
||||
font: var(--font-size-xs)/var(--line-height-md) var(--font-primary)
|
||||
color: var(--color-subtle-dark)
|
||||
padding-top: 0.75rem
|
||||
padding-left: 3rem
|
||||
|
||||
code
|
||||
p
|
||||
font: inherit
|
||||
|
||||
code, a
|
||||
color: inherit
|
||||
|
|
|
@ -6,6 +6,9 @@
|
|||
margin-bottom: var(--spacing-md)
|
||||
max-width: 100%
|
||||
|
||||
figure > .root
|
||||
margin-bottom: var(--spacing-sm)
|
||||
|
||||
.fixed
|
||||
table-layout: fixed
|
||||
|
||||
|
@ -54,6 +57,21 @@
|
|||
border-bottom: 2px solid var(--color-theme)
|
||||
vertical-align: bottom
|
||||
|
||||
.th-rotated
|
||||
height: 100px
|
||||
|
||||
.tx
|
||||
transform: translate(15px, 0) rotate(315deg) // 45 = 360 - 45
|
||||
width: 30px
|
||||
white-space: nowrap
|
||||
|
||||
& > span
|
||||
padding-left: 0.5rem
|
||||
|
||||
[data-tooltip]
|
||||
display: inline-block
|
||||
transform: rotate(-316deg)
|
||||
|
||||
.divider
|
||||
height: 0
|
||||
border-bottom: 1px solid var(--color-subtle)
|
||||
|
|
|
@ -19,7 +19,7 @@ import Footer from '../components/footer'
|
|||
import SEO from '../components/seo'
|
||||
import Link from '../components/link'
|
||||
import Section, { Hr } from '../components/section'
|
||||
import { Table, Tr, Th, Td } from '../components/table'
|
||||
import { Table, Tr, Th, Tx, Td } from '../components/table'
|
||||
import { Pre, Code, InlineCode, TypeAnnotation } from '../components/code'
|
||||
import { Ol, Ul, Li } from '../components/list'
|
||||
import { H2, H3, H4, H5, P, Abbr, Help } from '../components/typography'
|
||||
|
@ -64,6 +64,7 @@ const scopeComponents = {
|
|||
Infobox,
|
||||
Table,
|
||||
Tr,
|
||||
Tx,
|
||||
Th,
|
||||
Td,
|
||||
Help,
|
||||
|
|
|
@ -12,16 +12,21 @@ import {
|
|||
LandingDemo,
|
||||
LandingBannerGrid,
|
||||
LandingBanner,
|
||||
LandingLogos,
|
||||
} from '../components/landing'
|
||||
import { H2 } from '../components/typography'
|
||||
import { Ul, Li } from '../components/list'
|
||||
import { InlineCode } from '../components/code'
|
||||
import Button from '../components/button'
|
||||
import Link from '../components/link'
|
||||
|
||||
import QuickstartTraining from './quickstart-training'
|
||||
import Project from './project'
|
||||
import courseImage from '../../docs/images/course.jpg'
|
||||
import prodigyImage from '../../docs/images/prodigy_overview.jpg'
|
||||
import projectsImage from '../../docs/images/projects.png'
|
||||
import irlBackground from '../images/spacy-irl.jpg'
|
||||
|
||||
import BenchmarksChoi from 'usage/_benchmarks-choi.md'
|
||||
import Benchmarks from 'usage/_benchmarks-models.md'
|
||||
|
||||
const CODE_EXAMPLE = `# pip install spacy
|
||||
# python -m spacy download en_core_web_sm
|
||||
|
@ -82,8 +87,7 @@ const Landing = ({ data }) => {
|
|||
<LandingCard title="Get things done" url="/usage/spacy-101" button="Get started">
|
||||
spaCy is designed to help you do real work — to build real products, or gather
|
||||
real insights. The library respects your time, and tries to avoid wasting it.
|
||||
It's easy to install, and its API is simple and productive. We like to think of
|
||||
spaCy as the Ruby on Rails of Natural Language Processing.
|
||||
It's easy to install, and its API is simple and productive.
|
||||
</LandingCard>
|
||||
<LandingCard
|
||||
title="Blazing fast"
|
||||
|
@ -91,16 +95,14 @@ const Landing = ({ data }) => {
|
|||
button="Facts & Figures"
|
||||
>
|
||||
spaCy excels at large-scale information extraction tasks. It's written from the
|
||||
ground up in carefully memory-managed Cython. Independent research in 2015 found
|
||||
spaCy to be the fastest in the world. If your application needs to process
|
||||
entire web dumps, spaCy is the library you want to be using.
|
||||
ground up in carefully memory-managed Cython. If your application needs to
|
||||
process entire web dumps, spaCy is the library you want to be using.
|
||||
</LandingCard>
|
||||
|
||||
<LandingCard title="Deep learning" url="/usage/training" button="Read more">
|
||||
spaCy is the best way to prepare text for deep learning. It interoperates
|
||||
seamlessly with TensorFlow, PyTorch, scikit-learn, Gensim and the rest of
|
||||
Python's awesome AI ecosystem. With spaCy, you can easily construct
|
||||
linguistically sophisticated statistical models for a variety of NLP problems.
|
||||
<LandingCard title="Awesome ecosystem" url="/usage/projects" button="Read more">
|
||||
In the five years since its release, spaCy has become an industry standard with
|
||||
a huge ecosystem. Choose from a variety of plugins, integrate with your machine
|
||||
learning stack and build custom components and workflows.
|
||||
</LandingCard>
|
||||
</LandingGrid>
|
||||
|
||||
|
@ -110,43 +112,154 @@ const Landing = ({ data }) => {
|
|||
<LandingCol>
|
||||
<H2>Features</H2>
|
||||
<Ul>
|
||||
<Li>
|
||||
Non-destructive <strong>tokenization</strong>
|
||||
</Li>
|
||||
<Li>
|
||||
<strong>Named entity</strong> recognition
|
||||
</Li>
|
||||
<Li>
|
||||
Support for <strong>{counts.langs}+ languages</strong>
|
||||
</Li>
|
||||
<Li>
|
||||
<strong>{counts.models} statistical models</strong> for{' '}
|
||||
<strong>{counts.models} trained pipelines</strong> for{' '}
|
||||
{counts.modelLangs} languages
|
||||
</Li>
|
||||
<Li>
|
||||
Multi-task learning with pretrained <strong>transformers</strong> like
|
||||
BERT
|
||||
</Li>
|
||||
<Li>
|
||||
Pretrained <strong>word vectors</strong>
|
||||
</Li>
|
||||
<Li>State-of-the-art speed</Li>
|
||||
<Li>
|
||||
Easy <strong>deep learning</strong> integration
|
||||
Production-ready <strong>training system</strong>
|
||||
</Li>
|
||||
<Li>
|
||||
Linguistically-motivated <strong>tokenization</strong>
|
||||
</Li>
|
||||
<Li>
|
||||
Components for <strong>named entity</strong> recognition,
|
||||
part-of-speech-tagging, dependency parsing, sentence segmentation,{' '}
|
||||
<strong>text classification</strong>, lemmatization, morphological
|
||||
analysis, entity linking and more
|
||||
</Li>
|
||||
<Li>
|
||||
Easily extensible with <strong>custom components</strong> and attributes
|
||||
</Li>
|
||||
<Li>
|
||||
Support for custom models in <strong>PyTorch</strong>,{' '}
|
||||
<strong>TensorFlow</strong> and other frameworks
|
||||
</Li>
|
||||
<Li>Part-of-speech tagging</Li>
|
||||
<Li>Labelled dependency parsing</Li>
|
||||
<Li>Syntax-driven sentence segmentation</Li>
|
||||
<Li>
|
||||
Built in <strong>visualizers</strong> for syntax and NER
|
||||
</Li>
|
||||
<Li>Convenient string-to-hash mapping</Li>
|
||||
<Li>Export to numpy data arrays</Li>
|
||||
<Li>Efficient binary serialization</Li>
|
||||
<Li>
|
||||
Easy <strong>model packaging</strong> and deployment
|
||||
Easy <strong>model packaging</strong>, deployment and workflow
|
||||
management
|
||||
</Li>
|
||||
<Li>Robust, rigorously evaluated accuracy</Li>
|
||||
</Ul>
|
||||
</LandingCol>
|
||||
</LandingGrid>
|
||||
|
||||
<LandingBannerGrid>
|
||||
<LandingBanner
|
||||
label="New in v3.0"
|
||||
title="Transformer-based pipelines, new training system, project templates & more"
|
||||
to="/usage/v3"
|
||||
button="See what's new"
|
||||
small
|
||||
>
|
||||
spaCy v3.0 features all new <strong>transformer-based pipelines</strong> that
|
||||
bring spaCy's accuracy right up to the current <strong>state-of-the-art</strong>
|
||||
. You can use any pretrained transformer to train your own pipelines, and even
|
||||
share one transformer between multiple components with{' '}
|
||||
<strong>multi-task learning</strong>. Training is now fully configurable and
|
||||
extensible, and you can define your own custom models using{' '}
|
||||
<strong>PyTorch</strong>, <strong>TensorFlow</strong> and other frameworks. The
|
||||
new spaCy projects system lets you describe whole{' '}
|
||||
<strong>end-to-end workflows</strong> in a single file, giving you an easy path
|
||||
from prototype to production, and making it easy to clone and adapt
|
||||
best-practice projects for your own use cases.
|
||||
</LandingBanner>
|
||||
|
||||
<LandingBanner
|
||||
title="Prodigy: Radically efficient machine teaching"
|
||||
label="From the makers of spaCy"
|
||||
to="https://prodi.gy"
|
||||
button="Try it out"
|
||||
background="#f6f6f6"
|
||||
color="#000"
|
||||
small
|
||||
>
|
||||
<Link to="https://prodi.gy" hidden>
|
||||
{/** Update image */}
|
||||
<img
|
||||
src={prodigyImage}
|
||||
alt="Prodigy: Radically efficient machine teaching"
|
||||
/>
|
||||
</Link>
|
||||
<br />
|
||||
<br />
|
||||
Prodigy is an <strong>annotation tool</strong> so efficient that data scientists
|
||||
can do the annotation themselves, enabling a new level of rapid iteration.
|
||||
Whether you're working on entity recognition, intent detection or image
|
||||
classification, Prodigy can help you <strong>train and evaluate</strong> your
|
||||
models faster.
|
||||
</LandingBanner>
|
||||
</LandingBannerGrid>
|
||||
|
||||
<LandingGrid cols={2} style={{ gridTemplateColumns: '1fr calc(80ch + 14rem)' }}>
|
||||
<LandingCol>
|
||||
<H2>Reproducible training for custom pipelines</H2>
|
||||
<p>
|
||||
spaCy v3.0 introduces a comprehensive and extensible system for{' '}
|
||||
<strong>configuring your training runs</strong>. Your configuration file
|
||||
will describe every detail of your training run, with no hidden defaults,
|
||||
making it easy to <strong>rerun your experiments</strong> and track changes.
|
||||
You can use the quickstart widget or the{' '}
|
||||
<Link to="/api/cli#init-config">
|
||||
<InlineCode>init config</InlineCode>
|
||||
</Link>{' '}
|
||||
command to get started, or clone a project template for an end-to-end
|
||||
workflow.
|
||||
</p>
|
||||
<p>
|
||||
<Button to="/usage/training">Get started</Button>
|
||||
</p>
|
||||
</LandingCol>
|
||||
<LandingCol>
|
||||
<QuickstartTraining />
|
||||
</LandingCol>
|
||||
</LandingGrid>
|
||||
|
||||
<LandingGrid cols={2}>
|
||||
<LandingCol>
|
||||
<Link to="/usage/projects" hidden>
|
||||
<img src={projectsImage} />
|
||||
</Link>
|
||||
<br />
|
||||
<br />
|
||||
<br />
|
||||
{/** TODO: update with actual example */}
|
||||
<Project id="some_example">
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus interdum
|
||||
sodales lectus.
|
||||
</Project>
|
||||
</LandingCol>
|
||||
<LandingCol>
|
||||
<H2>End-to-end workflows from prototype to production</H2>
|
||||
<p>
|
||||
spaCy's new project system gives you a smooth path from prototype to
|
||||
production. It lets you keep track of all those{' '}
|
||||
<strong>data transformation</strong>, preprocessing and{' '}
|
||||
<strong>training steps</strong>, so you can make sure your project is always
|
||||
ready to hand over for automation. It features source asset download,
|
||||
command execution, checksum verification, and caching with a variety of
|
||||
backends and integrations.
|
||||
</p>
|
||||
<p>
|
||||
<Button to="/usage/projects">Try it out</Button>
|
||||
</p>
|
||||
</LandingCol>
|
||||
</LandingGrid>
|
||||
|
||||
<LandingBannerGrid>
|
||||
<LandingBanner
|
||||
to="https://course.spacy.io"
|
||||
|
@ -169,68 +282,41 @@ const Landing = ({ data }) => {
|
|||
<strong>55 exercises</strong> featuring videos, slide decks, multiple-choice
|
||||
questions and interactive coding practice in the browser.
|
||||
</LandingBanner>
|
||||
|
||||
<LandingBanner
|
||||
title="Prodigy: Radically efficient machine teaching"
|
||||
label="From the makers of spaCy"
|
||||
to="https://prodi.gy"
|
||||
button="Try it out"
|
||||
background="#eee"
|
||||
color="#000"
|
||||
title="spaCy IRL: Two days of NLP"
|
||||
label="Watch the videos"
|
||||
to="https://www.youtube.com/playlist?list=PLBmcuObd5An4UC6jvK_-eSl6jCvP1gwXc"
|
||||
button="Watch the videos"
|
||||
background="#ffc194"
|
||||
backgroundImage={irlBackground}
|
||||
color="#1a1e23"
|
||||
small
|
||||
>
|
||||
Prodigy is an <strong>annotation tool</strong> so efficient that data scientists
|
||||
can do the annotation themselves, enabling a new level of rapid iteration.
|
||||
Whether you're working on entity recognition, intent detection or image
|
||||
classification, Prodigy can help you <strong>train and evaluate</strong> your
|
||||
models faster. Stream in your own examples or real-world data from live APIs,
|
||||
update your model in real-time and chain models together to build more complex
|
||||
systems.
|
||||
We were pleased to invite the spaCy community and other folks working on NLP to
|
||||
Berlin for a small and intimate event. We booked a beautiful venue, hand-picked
|
||||
an awesome lineup of speakers and scheduled plenty of social time to get to know
|
||||
each other. The YouTube playlist includes 12 talks about NLP research,
|
||||
development and applications, with keynotes by Sebastian Ruder (DeepMind) and
|
||||
Yoav Goldberg (Allen AI).
|
||||
</LandingBanner>
|
||||
</LandingBannerGrid>
|
||||
|
||||
<LandingLogos title="spaCy is trusted by" logos={data.logosUsers}>
|
||||
<Button to={`https://github.com/${data.repo}/stargazers`}>and many more</Button>
|
||||
</LandingLogos>
|
||||
<LandingLogos title="Featured on" logos={data.logosPublications} />
|
||||
|
||||
<LandingBanner
|
||||
title="BERT-style language model pretraining"
|
||||
label="New in v2.1"
|
||||
to="/usage/v2-1"
|
||||
button="Read more"
|
||||
>
|
||||
Learn more from small training corpora by initializing your models with{' '}
|
||||
<strong>knowledge from raw text</strong>. The new pretrain command teaches spaCy's
|
||||
CNN model to predict words based on their context, producing representations of
|
||||
words in contexts. If you've seen Google's BERT system or fast.ai's ULMFiT, spaCy's
|
||||
pretraining is similar – but much more efficient. It's still experimental, but users
|
||||
are already reporting good results, so give it a try!
|
||||
</LandingBanner>
|
||||
|
||||
<LandingGrid cols={2}>
|
||||
<LandingGrid cols={2} style={{ gridTemplateColumns: '1fr 60%' }}>
|
||||
<LandingCol>
|
||||
<H2>Benchmarks</H2>
|
||||
<p>
|
||||
In 2015, independent researchers from Emory University and Yahoo! Labs
|
||||
showed that spaCy offered the{' '}
|
||||
<strong>fastest syntactic parser in the world</strong> and that its accuracy
|
||||
was <strong>within 1% of the best</strong> available (
|
||||
<Link to="https://aclweb.org/anthology/P/P15/P15-1038.pdf">
|
||||
Choi et al., 2015
|
||||
</Link>
|
||||
). spaCy v2.0, released in 2017, is more accurate than any of the systems
|
||||
Choi et al. evaluated.
|
||||
spaCy v3.0 introduces transformer-based pipelines that bring spaCy's
|
||||
accuracy right up to the current <strong>state-of-the-art</strong>. You can
|
||||
also use a CPU-optimized pipeline, which is less accurate but much cheaper
|
||||
to run.
|
||||
</p>
|
||||
<p>
|
||||
<Button to="/usage/facts-figures#benchmarks" large>
|
||||
See details
|
||||
</Button>
|
||||
<Button to="/usage/facts-figures#benchmarks">See details</Button>
|
||||
</p>
|
||||
</LandingCol>
|
||||
|
||||
<LandingCol>
|
||||
<BenchmarksChoi />
|
||||
<Benchmarks />
|
||||
</LandingCol>
|
||||
</LandingGrid>
|
||||
</>
|
||||
|
@ -245,18 +331,6 @@ Landing.propTypes = {
|
|||
models: PropTypes.arrayOf(PropTypes.string),
|
||||
})
|
||||
),
|
||||
logosUsers: PropTypes.arrayOf(
|
||||
PropTypes.shape({
|
||||
id: PropTypes.string.isRequired,
|
||||
url: PropTypes.string.isRequired,
|
||||
})
|
||||
),
|
||||
logosPublications: PropTypes.arrayOf(
|
||||
PropTypes.shape({
|
||||
id: PropTypes.string.isRequired,
|
||||
url: PropTypes.string.isRequired,
|
||||
})
|
||||
),
|
||||
}),
|
||||
}
|
||||
|
||||
|
@ -274,14 +348,6 @@ const landingQuery = graphql`
|
|||
models
|
||||
starters
|
||||
}
|
||||
logosUsers {
|
||||
id
|
||||
url
|
||||
}
|
||||
logosPublications {
|
||||
id
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,7 +58,8 @@ const QuickstartInstall = ({ id, title }) => (
|
|||
<StaticQuery
|
||||
query={query}
|
||||
render={({ site }) => {
|
||||
const models = site.siteMetadata.languages.filter(({ models }) => models !== null)
|
||||
const { nightly, languages } = site.siteMetadata
|
||||
const models = languages.filter(({ models }) => models !== null)
|
||||
const data = [
|
||||
...DATA,
|
||||
{
|
||||
|
@ -82,7 +83,10 @@ const QuickstartInstall = ({ id, title }) => (
|
|||
</QS>
|
||||
<QS package="pip">pip install -U spacy</QS>
|
||||
<QS package="conda">conda install -c conda-forge spacy</QS>
|
||||
<QS package="source">git clone https://github.com/{repo}</QS>
|
||||
<QS package="source">
|
||||
git clone https://github.com/{repo}
|
||||
{nightly ? ` --branch develop` : ''}
|
||||
</QS>
|
||||
<QS package="source">cd spaCy</QS>
|
||||
<QS package="source" os="linux">
|
||||
export PYTHONPATH=`pwd`
|
||||
|
@ -127,6 +131,7 @@ const query = graphql`
|
|||
query QuickstartInstallQuery {
|
||||
site {
|
||||
siteMetadata {
|
||||
nightly
|
||||
languages {
|
||||
code
|
||||
name
|
||||
|
|
|
@ -47,7 +47,7 @@ const DATA = [
|
|||
},
|
||||
]
|
||||
|
||||
export default function QuickstartTraining({ id, title, download = 'config.cfg' }) {
|
||||
export default function QuickstartTraining({ id, title, download = 'base_config.cfg' }) {
|
||||
const [lang, setLang] = useState(DEFAULT_LANG)
|
||||
const [components, setComponents] = useState([])
|
||||
const [[hardware], setHardware] = useState([DEFAULT_HARDWARE])
|
||||
|
|