mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-13 01:32:32 +03:00
add NER.v3 example to README
This commit is contained in:
parent
a0a195688f
commit
ffd18613d1
|
@ -119,9 +119,13 @@ from spacy_llm.util import assemble
|
||||||
nlp = assemble("config.cfg")
|
nlp = assemble("config.cfg")
|
||||||
doc = nlp("You look gorgeous!")
|
doc = nlp("You look gorgeous!")
|
||||||
print(doc.cats)
|
print(doc.cats)
|
||||||
|
# {"COMPLIMENT": 1.0, "INSULT": 0.0}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Example 2: Add NER using an open-source model through Hugging Face {id="example-2"}
|
|
||||||
|
### Example 2: Add a text classifier using an open-source model through HuggingFace {id="example-2"}
|
||||||
|
|
||||||
|
The same TextCat task can also be used with an open source model from HuggingFace.
|
||||||
|
|
||||||
To run this example, ensure that you have a GPU enabled, and `transformers`,
|
To run this example, ensure that you have a GPU enabled, and `transformers`,
|
||||||
`torch` and CUDA installed. For more background information, see the
|
`torch` and CUDA installed. For more background information, see the
|
||||||
|
@ -129,7 +133,7 @@ To run this example, ensure that you have a GPU enabled, and `transformers`,
|
||||||
|
|
||||||
Create a config file `config.cfg` containing at least the following (or see the
|
Create a config file `config.cfg` containing at least the following (or see the
|
||||||
full example
|
full example
|
||||||
[here](https://github.com/explosion/spacy-llm/tree/main/usage_examples/ner_dolly)):
|
[here](https://github.com/explosion/spacy-llm/tree/main/usage_examples/textcat_dolly)):
|
||||||
|
|
||||||
```ini
|
```ini
|
||||||
[nlp]
|
[nlp]
|
||||||
|
@ -142,9 +146,10 @@ pipeline = ["llm"]
|
||||||
factory = "llm"
|
factory = "llm"
|
||||||
|
|
||||||
[components.llm.task]
|
[components.llm.task]
|
||||||
@llm_tasks = "spacy.NER.v2"
|
@llm_tasks = "spacy.TextCat.v2"
|
||||||
labels = ["PERSON", "ORGANISATION", "LOCATION"]
|
labels = ["COMPLIMENT", "INSULT"]
|
||||||
|
|
||||||
|
# Use a local Dolly model instead of an API
|
||||||
[components.llm.model]
|
[components.llm.model]
|
||||||
@llm_models = "spacy.Dolly.v1"
|
@llm_models = "spacy.Dolly.v1"
|
||||||
# For better performance, use dolly-v2-12b instead
|
# For better performance, use dolly-v2-12b instead
|
||||||
|
@ -157,8 +162,9 @@ Now run:
|
||||||
from spacy_llm.util import assemble
|
from spacy_llm.util import assemble
|
||||||
|
|
||||||
nlp = assemble("config.cfg")
|
nlp = assemble("config.cfg")
|
||||||
doc = nlp("Jack and Jill rode up the hill in Les Deux Alpes")
|
doc = nlp("You look gorgeous!")
|
||||||
print([(ent.text, ent.label_) for ent in doc.ents])
|
print(doc.cats)
|
||||||
|
# {"COMPLIMENT": 1.0, "INSULT": 0.0}
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that Hugging Face will download the `"databricks/dolly-v2-3b"` model the
|
Note that Hugging Face will download the `"databricks/dolly-v2-3b"` model the
|
||||||
|
@ -167,7 +173,126 @@ first time you use it. You can
|
||||||
by setting the environmental variable `HF_HOME`. Also, you can upgrade the model
|
by setting the environmental variable `HF_HOME`. Also, you can upgrade the model
|
||||||
to be `"databricks/dolly-v2-12b"` for better performance.
|
to be `"databricks/dolly-v2-12b"` for better performance.
|
||||||
|
|
||||||
### Example 3: Create the component directly in Python {id="example-3"}
|
|
||||||
|
### Example 3: Add NER using a GPT-3 model from OpenAI {id="example-3"}
|
||||||
|
|
||||||
|
In previous versions of the `spacy-llm` `NER` and `SpanCat` tasks, you could configure
|
||||||
|
a zero-shot pipeline without any examples. The new `v3` (`spacy.NER.v3` and `spacy.SpanCat.v3`)
|
||||||
|
tasks use few-shot learning exclusively require at least 1 prompt example to be configured.
|
||||||
|
|
||||||
|
The old zero-shot prompt we were using really didn't work that well from our evaluations on
|
||||||
|
well known NER datasets (# TODO: link to evaluation results?)
|
||||||
|
|
||||||
|
The new v3 task prompts are based on the [PromptNER](https://arxiv.org/abs/2305.15444) paper and use
|
||||||
|
chain-of-thought reasoning to improve model quality. The accuracy of this prompt should be much better
|
||||||
|
for most NER cases but it does require a little bit more work upfront.
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
@misc{ashok2023promptner,
|
||||||
|
title={PromptNER: Prompting For Named Entity Recognition},
|
||||||
|
author={Dhananjay Ashok and Zachary C. Lipton},
|
||||||
|
year={2023},
|
||||||
|
eprint={2305.15444},
|
||||||
|
archivePrefix={arXiv},
|
||||||
|
primaryClass={cs.CL}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Create a config file `config.cfg` containing at least the following (or see the
|
||||||
|
full example
|
||||||
|
[here](https://github.com/explosion/spacy-llm/tree/main/usage_examples/ner_openai)):
|
||||||
|
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[nlp]
|
||||||
|
lang = "en"
|
||||||
|
pipeline = ["llm"]
|
||||||
|
|
||||||
|
[components]
|
||||||
|
|
||||||
|
[components.llm]
|
||||||
|
factory = "llm"
|
||||||
|
|
||||||
|
[components.llm.task]
|
||||||
|
@llm_tasks = "spacy.NER.v3"
|
||||||
|
labels = ["DISH", "INGREDIENT", "EQUIPMENT"]
|
||||||
|
description = Entities are the names food dishes,
|
||||||
|
ingredients, and any kind of cooking equipment.
|
||||||
|
Adjectives, verbs, adverbs are not entities.
|
||||||
|
Pronouns are not entities.
|
||||||
|
|
||||||
|
[components.llm.task.label_definitions]
|
||||||
|
DISH = "Known food dishes, e.g. Lobster Ravioli, garlic bread"
|
||||||
|
INGREDIENT = "Individual parts of a food dish, including herbs and spices."
|
||||||
|
EQUIPMENT = "Any kind of cooking equipment. e.g. oven, cooking pot, grill"
|
||||||
|
|
||||||
|
[components.llm.task.examples]
|
||||||
|
@misc = "spacy.FewShotReader.v1"
|
||||||
|
path = "ner_examples.json"
|
||||||
|
|
||||||
|
[components.llm.model]
|
||||||
|
@llm_models = "spacy.GPT-3-5.v1"
|
||||||
|
```
|
||||||
|
|
||||||
|
```json
|
||||||
|
# ner_examples.json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"text": "You can't get a great chocolate flavor with carob.",
|
||||||
|
"spans": [
|
||||||
|
{
|
||||||
|
"text": "chocolate",
|
||||||
|
"is_entity": false,
|
||||||
|
"label": "==NONE==",
|
||||||
|
"reason": "is a flavor in this context, not an ingredient"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "carob",
|
||||||
|
"is_entity": true,
|
||||||
|
"label": "INGREDIENT",
|
||||||
|
"reason": "is an ingredient to add chocolate flavor"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "You can probably sand-blast it if it's an anodized aluminum pan",
|
||||||
|
"spans": [
|
||||||
|
{
|
||||||
|
"text": "sand-blast",
|
||||||
|
"is_entity": false,
|
||||||
|
"label": "==NONE==",
|
||||||
|
"reason": "is a cleaning technique, not some kind of equipment"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "anodized aluminum pan",
|
||||||
|
"is_entity": true,
|
||||||
|
"label": "EQUIPMENT",
|
||||||
|
"reason": "is a piece of cooking equipment, anodized is included since it describes the type of pan"
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
Now run:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from spacy_llm.util import assemble
|
||||||
|
|
||||||
|
nlp = assemble("config.cfg")
|
||||||
|
doc = nlp(
|
||||||
|
"Sriracha sauce goes really well with hoisin stir fry, "
|
||||||
|
"but you should add it after you use the wok."
|
||||||
|
)
|
||||||
|
print([(ent.text, ent.label_) for ent in doc.ents])
|
||||||
|
# [('Sriracha sauce', 'INGREDIENT'),
|
||||||
|
# ('hoisin stir fry', 'DISH'),
|
||||||
|
# ('wok', 'EQUIPMENT')]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Example 4: Create the component directly in Python {id="example-4"}
|
||||||
|
|
||||||
The `llm` component behaves as any other component does, so adding it to an
|
The `llm` component behaves as any other component does, so adding it to an
|
||||||
existing pipeline follows the same pattern:
|
existing pipeline follows the same pattern:
|
||||||
|
@ -180,16 +305,38 @@ nlp.add_pipe(
|
||||||
"llm",
|
"llm",
|
||||||
config={
|
config={
|
||||||
"task": {
|
"task": {
|
||||||
"@llm_tasks": "spacy.NER.v2",
|
"@llm_tasks": "spacy.NER.v3",
|
||||||
"labels": ["PERSON", "ORGANISATION", "LOCATION"]
|
"labels": ["DISH", "INGREDIENT", "EQUIPMENT"]
|
||||||
|
"examples": [
|
||||||
|
{
|
||||||
|
"text": "You can't get a great chocolate flavor with carob.",
|
||||||
|
"spans": [
|
||||||
|
{
|
||||||
|
"text": "chocolate",
|
||||||
|
"is_entity": False,
|
||||||
|
"label": "==NONE==",
|
||||||
|
"reason": "is a flavor in this context, not an ingredient"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "carob",
|
||||||
|
"is_entity": True,
|
||||||
|
"label": "INGREDIENT",
|
||||||
|
"reason": "is an ingredient to add chocolate flavor"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"model": {
|
"model": {
|
||||||
"@llm_models": "spacy.gpt-3.5.v1",
|
"@llm_models": "spacy.GPT-3.5.v1"
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
nlp.initialize()
|
nlp.initialize()
|
||||||
doc = nlp("Jack and Jill rode up the hill in Les Deux Alpes")
|
doc = nlp(
|
||||||
|
"Sriracha sauce goes really well with hoisin stir fry, "
|
||||||
|
"but you should add it after you use the wok."
|
||||||
|
)
|
||||||
print([(ent.text, ent.label_) for ent in doc.ents])
|
print([(ent.text, ent.label_) for ent in doc.ents])
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user