mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-20 09:01:58 +03:00
Merge branch 'master' into docs/llm
This commit is contained in:
commit
40ae30dc5a
|
@ -1,6 +1,6 @@
|
|||
# fmt: off
|
||||
__title__ = "spacy"
|
||||
__version__ = "3.6.0"
|
||||
__version__ = "3.6.1"
|
||||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
||||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
||||
__projects__ = "https://github.com/explosion/projects"
|
||||
|
|
|
@ -15,4 +15,7 @@ sentences = [
|
|||
"Türkiye'nin başkenti neresi?",
|
||||
"Bakanlar Kurulu 180 günlük eylem planını açıkladı.",
|
||||
"Merkez Bankası, beklentiler doğrultusunda faizlerde değişikliğe gitmedi.",
|
||||
"Cemal Sureya kimdir?",
|
||||
"Bunlari Biliyor muydunuz?",
|
||||
"Altinoluk Turkiye haritasinin neresinde yer alir?",
|
||||
]
|
||||
|
|
|
@ -67,8 +67,8 @@ def build_hash_embed_cnn_tok2vec(
|
|||
are between 2 and 8.
|
||||
window_size (int): The number of tokens on either side to concatenate during
|
||||
the convolutions. The receptive field of the CNN will be
|
||||
depth * (window_size * 2 + 1), so a 4-layer network with window_size of
|
||||
2 will be sensitive to 20 words at a time. Recommended value is 1.
|
||||
depth * window_size * 2 + 1, so a 4-layer network with window_size of
|
||||
2 will be sensitive to 17 words at a time. Recommended value is 1.
|
||||
embed_size (int): The number of rows in the hash embedding tables. This can
|
||||
be surprisingly small, due to the use of the hash embeddings. Recommended
|
||||
values are between 2000 and 10000.
|
||||
|
|
|
@ -83,7 +83,7 @@ consisting of a CNN and a layer-normalized maxout activation function.
|
|||
| `width` | The width of the input and output. These are required to be the same, so that residual connections can be used. Recommended values are `96`, `128` or `300`. ~~int~~ |
|
||||
| `depth` | The number of convolutional layers to use. Recommended values are between `2` and `8`. ~~int~~ |
|
||||
| `embed_size` | The number of rows in the hash embedding tables. This can be surprisingly small, due to the use of the hash embeddings. Recommended values are between `2000` and `10000`. ~~int~~ |
|
||||
| `window_size` | The number of tokens on either side to concatenate during the convolutions. The receptive field of the CNN will be `depth * (window_size * 2 + 1)`, so a 4-layer network with a window size of `2` will be sensitive to 20 words at a time. Recommended value is `1`. ~~int~~ |
|
||||
| `window_size` | The number of tokens on either side to concatenate during the convolutions. The receptive field of the CNN will be `depth * window_size * 2 + 1`, so a 4-layer network with a window size of `2` will be sensitive to 17 words at a time. Recommended value is `1`. ~~int~~ |
|
||||
| `maxout_pieces` | The number of pieces to use in the maxout non-linearity. If `1`, the [`Mish`](https://thinc.ai/docs/api-layers#mish) non-linearity is used instead. Recommended values are `1`-`3`. ~~int~~ |
|
||||
| `subword_features` | Whether to also embed subword features, specifically the prefix, suffix and word shape. This is recommended for alphabetic languages like English, but not if single-character tokens are used for a language such as Chinese. ~~bool~~ |
|
||||
| `pretrained_vectors` | Whether to also use static vectors. ~~bool~~ |
|
||||
|
|
|
@ -893,7 +893,7 @@ OpenAI's `davinci` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Davinci.v1 "
|
||||
> @llm_models = "spacy.Davinci.v1"
|
||||
> name = "davinci"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
@ -914,7 +914,7 @@ OpenAI's `curie` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Curie.v1 "
|
||||
> @llm_models = "spacy.Curie.v1"
|
||||
> name = "curie"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
@ -935,7 +935,7 @@ OpenAI's `babbage` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Babbage.v1 "
|
||||
> @llm_models = "spacy.Babbage.v1"
|
||||
> name = "babbage"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
@ -956,7 +956,7 @@ OpenAI's `ada` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Ada.v1 "
|
||||
> @llm_models = "spacy.Ada.v1"
|
||||
> name = "ada"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
@ -977,7 +977,7 @@ Cohere's `command` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Command.v1 "
|
||||
> @llm_models = "spacy.Command.v1"
|
||||
> name = "command"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
@ -998,7 +998,7 @@ Anthropic's `claude-2` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Claude-2.v1 "
|
||||
> @llm_models = "spacy.Claude-2.v1"
|
||||
> name = "claude-2"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
@ -1019,7 +1019,7 @@ Anthropic's `claude-1` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Claude-1.v1 "
|
||||
> @llm_models = "spacy.Claude-1.v1"
|
||||
> name = "claude-1"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
@ -1040,7 +1040,7 @@ Anthropic's `claude-instant-1` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Claude-instant-1.v1 "
|
||||
> @llm_models = "spacy.Claude-instant-1.v1"
|
||||
> name = "claude-instant-1"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
@ -1061,7 +1061,7 @@ Anthropic's `claude-instant-1.1` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Claude-instant-1-1.v1 "
|
||||
> @llm_models = "spacy.Claude-instant-1-1.v1"
|
||||
> name = "claude-instant-1.1"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
@ -1082,7 +1082,7 @@ Anthropic's `claude-1.0` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Claude-1-0.v1 "
|
||||
> @llm_models = "spacy.Claude-1-0.v1"
|
||||
> name = "claude-1.0"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
@ -1124,7 +1124,7 @@ Anthropic's `claude-1.3` model family.
|
|||
>
|
||||
> ```ini
|
||||
> [components.llm.model]
|
||||
> @llm_models = "spacy.Claude-1-3.v1 "
|
||||
> @llm_models = "spacy.Claude-1-3.v1"
|
||||
> name = "claude-1.3"
|
||||
> config = {"temperature": 0.3}
|
||||
> ```
|
||||
|
|
|
@ -521,7 +521,7 @@ has two columns, indicating the start and end position.
|
|||
| Name | Description |
|
||||
| ----------- | ---------------------------------------------------------------------------- |
|
||||
| `min_size` | The minimal phrase lengths to suggest (inclusive). ~~[int]~~ |
|
||||
| `max_size` | The maximal phrase lengths to suggest (exclusive). ~~[int]~~ |
|
||||
| `max_size` | The maximal phrase lengths to suggest (inclusive). ~~[int]~~ |
|
||||
| **CREATES** | The suggester function. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ |
|
||||
|
||||
### spacy.preset_spans_suggester.v1 {id="preset_spans_suggester"}
|
||||
|
|
|
@ -68,7 +68,7 @@ weights, and returns it.
|
|||
cls = spacy.util.get_lang_class(lang) # 1. Get Language class, e.g. English
|
||||
nlp = cls() # 2. Initialize it
|
||||
for name in pipeline:
|
||||
nlp.add_pipe(name) # 3. Add the component to the pipeline
|
||||
nlp.add_pipe(name, config={...}) # 3. Add the component to the pipeline
|
||||
nlp.from_disk(data_path) # 4. Load in the binary data
|
||||
```
|
||||
|
||||
|
|
|
@ -244,7 +244,7 @@ tagging pipeline. This is also why the pipeline state is always held by the
|
|||
together and returns an instance of `Language` with a pipeline set and access to
|
||||
the binary data:
|
||||
|
||||
```python {title="spacy.load under the hood"}
|
||||
```python {title="spacy.load under the hood (abstract example)"}
|
||||
lang = "en"
|
||||
pipeline = ["tok2vec", "tagger", "parser", "ner", "attribute_ruler", "lemmatizer"]
|
||||
data_path = "path/to/en_core_web_sm/en_core_web_sm-3.0.0"
|
||||
|
@ -252,7 +252,7 @@ data_path = "path/to/en_core_web_sm/en_core_web_sm-3.0.0"
|
|||
cls = spacy.util.get_lang_class(lang) # 1. Get Language class, e.g. English
|
||||
nlp = cls() # 2. Initialize it
|
||||
for name in pipeline:
|
||||
nlp.add_pipe(name) # 3. Add the component to the pipeline
|
||||
nlp.add_pipe(name, config={...}) # 3. Add the component to the pipeline
|
||||
nlp.from_disk(data_path) # 4. Load in the binary data
|
||||
```
|
||||
|
||||
|
|
|
@ -4444,6 +4444,31 @@
|
|||
},
|
||||
"category": ["pipeline", "standalone", "scientific"],
|
||||
"tags": ["ner"]
|
||||
},
|
||||
{
|
||||
"id": "hobbit-spacy",
|
||||
"title": "Hobbit spaCy",
|
||||
"slogan": "NLP for Middle Earth",
|
||||
"description": "Hobbit spaCy is a custom spaCy pipeline designed specifically for working with Middle Earth and texts from the world of J.R.R. Tolkien.",
|
||||
"github": "wjbmattingly/hobbit-spacy",
|
||||
"pip": "en-hobbit",
|
||||
"code_example": [
|
||||
"import spacy",
|
||||
"",
|
||||
"nlp = spacy.load('en_hobbit')",
|
||||
"doc = nlp('Frodo saw Glorfindel and Glóin; and in a corner alone Strider was sitting, clad in his old travel - worn clothes again')"
|
||||
],
|
||||
"code_language": "python",
|
||||
"thumb": "https://github.com/wjbmattingly/hobbit-spacy/blob/main/images/hobbit-thumbnail.png?raw=true",
|
||||
"image": "https://github.com/wjbmattingly/hobbit-spacy/raw/main/images/hobbitspacy.png",
|
||||
"author": "W.J.B. Mattingly",
|
||||
"author_links": {
|
||||
"twitter": "wjb_mattingly",
|
||||
"github": "wjbmattingly",
|
||||
"website": "https://wjbmattingly.com"
|
||||
},
|
||||
"category": ["pipeline", "standalone"],
|
||||
"tags": ["spans", "rules", "ner"]
|
||||
}
|
||||
],
|
||||
|
||||
|
|
|
@ -16,3 +16,9 @@ NETLIFY_NEXT_PLUGIN_SKIP = "true"
|
|||
|
||||
[[plugins]]
|
||||
package = "@netlify/plugin-nextjs"
|
||||
|
||||
[[headers]]
|
||||
for = "/*"
|
||||
[headers.values]
|
||||
X-Frame-Options = "DENY"
|
||||
X-XSS-Protection = "1; mode=block"
|
||||
|
|
Loading…
Reference in New Issue
Block a user