Merge branch 'master' into docs/llm

This commit is contained in:
svlandeg 2023-08-31 11:54:30 +02:00
commit 40ae30dc5a
10 changed files with 53 additions and 19 deletions

View File

@ -1,6 +1,6 @@
# fmt: off
__title__ = "spacy"
__version__ = "3.6.0"
__version__ = "3.6.1"
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
__projects__ = "https://github.com/explosion/projects"

View File

@ -15,4 +15,7 @@ sentences = [
"Türkiye'nin başkenti neresi?",
"Bakanlar Kurulu 180 günlük eylem planınııkladı.",
"Merkez Bankası, beklentiler doğrultusunda faizlerde değişikliğe gitmedi.",
"Cemal Sureya kimdir?",
"Bunlari Biliyor muydunuz?",
"Altinoluk Turkiye haritasinin neresinde yer alir?",
]

View File

@ -67,8 +67,8 @@ def build_hash_embed_cnn_tok2vec(
are between 2 and 8.
window_size (int): The number of tokens on either side to concatenate during
the convolutions. The receptive field of the CNN will be
depth * (window_size * 2 + 1), so a 4-layer network with window_size of
2 will be sensitive to 20 words at a time. Recommended value is 1.
depth * window_size * 2 + 1, so a 4-layer network with window_size of
2 will be sensitive to 17 words at a time. Recommended value is 1.
embed_size (int): The number of rows in the hash embedding tables. This can
be surprisingly small, due to the use of the hash embeddings. Recommended
values are between 2000 and 10000.

View File

@ -83,7 +83,7 @@ consisting of a CNN and a layer-normalized maxout activation function.
| `width` | The width of the input and output. These are required to be the same, so that residual connections can be used. Recommended values are `96`, `128` or `300`. ~~int~~ |
| `depth` | The number of convolutional layers to use. Recommended values are between `2` and `8`. ~~int~~ |
| `embed_size` | The number of rows in the hash embedding tables. This can be surprisingly small, due to the use of the hash embeddings. Recommended values are between `2000` and `10000`. ~~int~~ |
| `window_size` | The number of tokens on either side to concatenate during the convolutions. The receptive field of the CNN will be `depth * (window_size * 2 + 1)`, so a 4-layer network with a window size of `2` will be sensitive to 20 words at a time. Recommended value is `1`. ~~int~~ |
| `window_size` | The number of tokens on either side to concatenate during the convolutions. The receptive field of the CNN will be `depth * window_size * 2 + 1`, so a 4-layer network with a window size of `2` will be sensitive to 17 words at a time. Recommended value is `1`. ~~int~~ |
| `maxout_pieces` | The number of pieces to use in the maxout non-linearity. If `1`, the [`Mish`](https://thinc.ai/docs/api-layers#mish) non-linearity is used instead. Recommended values are `1`-`3`. ~~int~~ |
| `subword_features` | Whether to also embed subword features, specifically the prefix, suffix and word shape. This is recommended for alphabetic languages like English, but not if single-character tokens are used for a language such as Chinese. ~~bool~~ |
| `pretrained_vectors` | Whether to also use static vectors. ~~bool~~ |

View File

@ -893,7 +893,7 @@ OpenAI's `davinci` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Davinci.v1 "
> @llm_models = "spacy.Davinci.v1"
> name = "davinci"
> config = {"temperature": 0.3}
> ```
@ -914,7 +914,7 @@ OpenAI's `curie` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Curie.v1 "
> @llm_models = "spacy.Curie.v1"
> name = "curie"
> config = {"temperature": 0.3}
> ```
@ -935,7 +935,7 @@ OpenAI's `babbage` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Babbage.v1 "
> @llm_models = "spacy.Babbage.v1"
> name = "babbage"
> config = {"temperature": 0.3}
> ```
@ -956,7 +956,7 @@ OpenAI's `ada` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Ada.v1 "
> @llm_models = "spacy.Ada.v1"
> name = "ada"
> config = {"temperature": 0.3}
> ```
@ -977,7 +977,7 @@ Cohere's `command` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Command.v1 "
> @llm_models = "spacy.Command.v1"
> name = "command"
> config = {"temperature": 0.3}
> ```
@ -998,7 +998,7 @@ Anthropic's `claude-2` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Claude-2.v1 "
> @llm_models = "spacy.Claude-2.v1"
> name = "claude-2"
> config = {"temperature": 0.3}
> ```
@ -1019,7 +1019,7 @@ Anthropic's `claude-1` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Claude-1.v1 "
> @llm_models = "spacy.Claude-1.v1"
> name = "claude-1"
> config = {"temperature": 0.3}
> ```
@ -1040,7 +1040,7 @@ Anthropic's `claude-instant-1` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Claude-instant-1.v1 "
> @llm_models = "spacy.Claude-instant-1.v1"
> name = "claude-instant-1"
> config = {"temperature": 0.3}
> ```
@ -1061,7 +1061,7 @@ Anthropic's `claude-instant-1.1` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Claude-instant-1-1.v1 "
> @llm_models = "spacy.Claude-instant-1-1.v1"
> name = "claude-instant-1.1"
> config = {"temperature": 0.3}
> ```
@ -1082,7 +1082,7 @@ Anthropic's `claude-1.0` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Claude-1-0.v1 "
> @llm_models = "spacy.Claude-1-0.v1"
> name = "claude-1.0"
> config = {"temperature": 0.3}
> ```
@ -1124,7 +1124,7 @@ Anthropic's `claude-1.3` model family.
>
> ```ini
> [components.llm.model]
> @llm_models = "spacy.Claude-1-3.v1 "
> @llm_models = "spacy.Claude-1-3.v1"
> name = "claude-1.3"
> config = {"temperature": 0.3}
> ```

View File

@ -521,7 +521,7 @@ has two columns, indicating the start and end position.
| Name | Description |
| ----------- | ---------------------------------------------------------------------------- |
| `min_size` | The minimal phrase lengths to suggest (inclusive). ~~[int]~~ |
| `max_size` | The maximal phrase lengths to suggest (exclusive). ~~[int]~~ |
| `max_size` | The maximal phrase lengths to suggest (inclusive). ~~[int]~~ |
| **CREATES** | The suggester function. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ |
### spacy.preset_spans_suggester.v1 {id="preset_spans_suggester"}

View File

@ -68,7 +68,7 @@ weights, and returns it.
cls = spacy.util.get_lang_class(lang) # 1. Get Language class, e.g. English
nlp = cls() # 2. Initialize it
for name in pipeline:
nlp.add_pipe(name) # 3. Add the component to the pipeline
nlp.add_pipe(name, config={...}) # 3. Add the component to the pipeline
nlp.from_disk(data_path) # 4. Load in the binary data
```

View File

@ -244,7 +244,7 @@ tagging pipeline. This is also why the pipeline state is always held by the
together and returns an instance of `Language` with a pipeline set and access to
the binary data:
```python {title="spacy.load under the hood"}
```python {title="spacy.load under the hood (abstract example)"}
lang = "en"
pipeline = ["tok2vec", "tagger", "parser", "ner", "attribute_ruler", "lemmatizer"]
data_path = "path/to/en_core_web_sm/en_core_web_sm-3.0.0"
@ -252,7 +252,7 @@ data_path = "path/to/en_core_web_sm/en_core_web_sm-3.0.0"
cls = spacy.util.get_lang_class(lang) # 1. Get Language class, e.g. English
nlp = cls() # 2. Initialize it
for name in pipeline:
nlp.add_pipe(name) # 3. Add the component to the pipeline
nlp.add_pipe(name, config={...}) # 3. Add the component to the pipeline
nlp.from_disk(data_path) # 4. Load in the binary data
```

View File

@ -4444,6 +4444,31 @@
},
"category": ["pipeline", "standalone", "scientific"],
"tags": ["ner"]
},
{
"id": "hobbit-spacy",
"title": "Hobbit spaCy",
"slogan": "NLP for Middle Earth",
"description": "Hobbit spaCy is a custom spaCy pipeline designed specifically for working with Middle Earth and texts from the world of J.R.R. Tolkien.",
"github": "wjbmattingly/hobbit-spacy",
"pip": "en-hobbit",
"code_example": [
"import spacy",
"",
"nlp = spacy.load('en_hobbit')",
"doc = nlp('Frodo saw Glorfindel and Glóin; and in a corner alone Strider was sitting, clad in his old travel - worn clothes again')"
],
"code_language": "python",
"thumb": "https://github.com/wjbmattingly/hobbit-spacy/blob/main/images/hobbit-thumbnail.png?raw=true",
"image": "https://github.com/wjbmattingly/hobbit-spacy/raw/main/images/hobbitspacy.png",
"author": "W.J.B. Mattingly",
"author_links": {
"twitter": "wjb_mattingly",
"github": "wjbmattingly",
"website": "https://wjbmattingly.com"
},
"category": ["pipeline", "standalone"],
"tags": ["spans", "rules", "ner"]
}
],

View File

@ -16,3 +16,9 @@ NETLIFY_NEXT_PLUGIN_SKIP = "true"
[[plugins]]
package = "@netlify/plugin-nextjs"
[[headers]]
for = "/*"
[headers.values]
X-Frame-Options = "DENY"
X-XSS-Protection = "1; mode=block"