mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 09:44:36 +03:00
Auto-format code with black (#10377)
Co-authored-by: explosion-bot <explosion-bot@users.noreply.github.com>
This commit is contained in:
parent
5f568f7e41
commit
d637b34e2f
|
@ -7,139 +7,182 @@ from spacy.training import Example
|
||||||
|
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
(
|
(
|
||||||
'Who is Kofi Annan?',
|
"Who is Kofi Annan?",
|
||||||
{
|
{
|
||||||
'entities': [(7, 18, 'PERSON')],
|
"entities": [(7, 18, "PERSON")],
|
||||||
'tags': ['PRON', 'AUX', 'PROPN', 'PRON', 'PUNCT'],
|
"tags": ["PRON", "AUX", "PROPN", "PRON", "PUNCT"],
|
||||||
'heads': [1, 1, 3, 1, 1],
|
"heads": [1, 1, 3, 1, 1],
|
||||||
'deps': ['attr', 'ROOT', 'compound', 'nsubj', 'punct'],
|
"deps": ["attr", "ROOT", "compound", "nsubj", "punct"],
|
||||||
'morphs': ['', 'Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin', 'Number=Sing', 'Number=Sing', 'PunctType=Peri'],
|
"morphs": [
|
||||||
'cats': {'question': 1.0}
|
"",
|
||||||
}
|
"Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin",
|
||||||
),
|
"Number=Sing",
|
||||||
(
|
"Number=Sing",
|
||||||
'Who is Steve Jobs?',
|
"PunctType=Peri",
|
||||||
{
|
],
|
||||||
'entities': [(7, 17, 'PERSON')],
|
"cats": {"question": 1.0},
|
||||||
'tags': ['PRON', 'AUX', 'PROPN', 'PRON', 'PUNCT'],
|
|
||||||
'heads': [1, 1, 3, 1, 1],
|
|
||||||
'deps': ['attr', 'ROOT', 'compound', 'nsubj', 'punct'],
|
|
||||||
'morphs': ['', 'Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin', 'Number=Sing', 'Number=Sing', 'PunctType=Peri'],
|
|
||||||
'cats': {'question': 1.0}
|
|
||||||
}
|
|
||||||
),
|
|
||||||
(
|
|
||||||
'Bob is a nice person.',
|
|
||||||
{
|
|
||||||
'entities': [(0, 3, 'PERSON')],
|
|
||||||
'tags': ['PROPN', 'AUX', 'DET', 'ADJ', 'NOUN', 'PUNCT'],
|
|
||||||
'heads': [1, 1, 4, 4, 1, 1],
|
|
||||||
'deps': ['nsubj', 'ROOT', 'det', 'amod', 'attr', 'punct'],
|
|
||||||
'morphs': ['Number=Sing', 'Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin', 'Definite=Ind|PronType=Art', 'Degree=Pos', 'Number=Sing', 'PunctType=Peri'],
|
|
||||||
'cats': {'statement': 1.0}
|
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
'Hi Anil, how are you?',
|
"Who is Steve Jobs?",
|
||||||
{
|
{
|
||||||
'entities': [(3, 7, 'PERSON')],
|
"entities": [(7, 17, "PERSON")],
|
||||||
'tags': ['INTJ', 'PROPN', 'PUNCT', 'ADV', 'AUX', 'PRON', 'PUNCT'],
|
"tags": ["PRON", "AUX", "PROPN", "PRON", "PUNCT"],
|
||||||
'deps': ['intj', 'npadvmod', 'punct', 'advmod', 'ROOT', 'nsubj', 'punct'],
|
"heads": [1, 1, 3, 1, 1],
|
||||||
'heads': [4, 0, 4, 4, 4, 4, 4],
|
"deps": ["attr", "ROOT", "compound", "nsubj", "punct"],
|
||||||
'morphs': ['', 'Number=Sing', 'PunctType=Comm', '', 'Mood=Ind|Tense=Pres|VerbForm=Fin', 'Case=Nom|Person=2|PronType=Prs', 'PunctType=Peri'],
|
"morphs": [
|
||||||
'cats': {'greeting': 1.0, 'question': 1.0}
|
"",
|
||||||
}
|
"Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin",
|
||||||
|
"Number=Sing",
|
||||||
|
"Number=Sing",
|
||||||
|
"PunctType=Peri",
|
||||||
|
],
|
||||||
|
"cats": {"question": 1.0},
|
||||||
|
},
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
'I like London and Berlin.',
|
"Bob is a nice person.",
|
||||||
{
|
{
|
||||||
'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')],
|
"entities": [(0, 3, "PERSON")],
|
||||||
'tags': ['PROPN', 'VERB', 'PROPN', 'CCONJ', 'PROPN', 'PUNCT'],
|
"tags": ["PROPN", "AUX", "DET", "ADJ", "NOUN", "PUNCT"],
|
||||||
'deps': ['nsubj', 'ROOT', 'dobj', 'cc', 'conj', 'punct'],
|
"heads": [1, 1, 4, 4, 1, 1],
|
||||||
'heads': [1, 1, 1, 2, 2, 1],
|
"deps": ["nsubj", "ROOT", "det", "amod", "attr", "punct"],
|
||||||
'morphs': ['Case=Nom|Number=Sing|Person=1|PronType=Prs', 'Tense=Pres|VerbForm=Fin', 'Number=Sing', 'ConjType=Cmp', 'Number=Sing', 'PunctType=Peri'],
|
"morphs": [
|
||||||
'cats': {'statement': 1.0}
|
"Number=Sing",
|
||||||
}
|
"Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin",
|
||||||
)
|
"Definite=Ind|PronType=Art",
|
||||||
|
"Degree=Pos",
|
||||||
|
"Number=Sing",
|
||||||
|
"PunctType=Peri",
|
||||||
|
],
|
||||||
|
"cats": {"statement": 1.0},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Hi Anil, how are you?",
|
||||||
|
{
|
||||||
|
"entities": [(3, 7, "PERSON")],
|
||||||
|
"tags": ["INTJ", "PROPN", "PUNCT", "ADV", "AUX", "PRON", "PUNCT"],
|
||||||
|
"deps": ["intj", "npadvmod", "punct", "advmod", "ROOT", "nsubj", "punct"],
|
||||||
|
"heads": [4, 0, 4, 4, 4, 4, 4],
|
||||||
|
"morphs": [
|
||||||
|
"",
|
||||||
|
"Number=Sing",
|
||||||
|
"PunctType=Comm",
|
||||||
|
"",
|
||||||
|
"Mood=Ind|Tense=Pres|VerbForm=Fin",
|
||||||
|
"Case=Nom|Person=2|PronType=Prs",
|
||||||
|
"PunctType=Peri",
|
||||||
|
],
|
||||||
|
"cats": {"greeting": 1.0, "question": 1.0},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"I like London and Berlin.",
|
||||||
|
{
|
||||||
|
"entities": [(7, 13, "LOC"), (18, 24, "LOC")],
|
||||||
|
"tags": ["PROPN", "VERB", "PROPN", "CCONJ", "PROPN", "PUNCT"],
|
||||||
|
"deps": ["nsubj", "ROOT", "dobj", "cc", "conj", "punct"],
|
||||||
|
"heads": [1, 1, 1, 2, 2, 1],
|
||||||
|
"morphs": [
|
||||||
|
"Case=Nom|Number=Sing|Person=1|PronType=Prs",
|
||||||
|
"Tense=Pres|VerbForm=Fin",
|
||||||
|
"Number=Sing",
|
||||||
|
"ConjType=Cmp",
|
||||||
|
"Number=Sing",
|
||||||
|
"PunctType=Peri",
|
||||||
|
],
|
||||||
|
"cats": {"statement": 1.0},
|
||||||
|
},
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
REHEARSE_DATA = [
|
REHEARSE_DATA = [
|
||||||
(
|
(
|
||||||
'Hi Anil',
|
"Hi Anil",
|
||||||
{
|
{
|
||||||
'entities': [(3, 7, 'PERSON')],
|
"entities": [(3, 7, "PERSON")],
|
||||||
'tags': ['INTJ', 'PROPN'],
|
"tags": ["INTJ", "PROPN"],
|
||||||
'deps': ['ROOT', 'npadvmod'],
|
"deps": ["ROOT", "npadvmod"],
|
||||||
'heads': [0, 0],
|
"heads": [0, 0],
|
||||||
'morphs': ['', 'Number=Sing'],
|
"morphs": ["", "Number=Sing"],
|
||||||
'cats': {'greeting': 1.0}
|
"cats": {"greeting": 1.0},
|
||||||
}
|
},
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
'Hi Ravish, how you doing?',
|
"Hi Ravish, how you doing?",
|
||||||
{
|
{
|
||||||
'entities': [(3, 9, 'PERSON')],
|
"entities": [(3, 9, "PERSON")],
|
||||||
'tags': ['INTJ', 'PROPN', 'PUNCT', 'ADV', 'AUX', 'PRON', 'PUNCT'],
|
"tags": ["INTJ", "PROPN", "PUNCT", "ADV", "AUX", "PRON", "PUNCT"],
|
||||||
'deps': ['intj', 'ROOT', 'punct', 'advmod', 'nsubj', 'advcl', 'punct'],
|
"deps": ["intj", "ROOT", "punct", "advmod", "nsubj", "advcl", "punct"],
|
||||||
'heads': [1, 1, 1, 5, 5, 1, 1],
|
"heads": [1, 1, 1, 5, 5, 1, 1],
|
||||||
'morphs': ['', 'VerbForm=Inf', 'PunctType=Comm', '', 'Case=Nom|Person=2|PronType=Prs', 'Aspect=Prog|Tense=Pres|VerbForm=Part', 'PunctType=Peri'],
|
"morphs": [
|
||||||
'cats': {'greeting': 1.0, 'question': 1.0}
|
"",
|
||||||
}
|
"VerbForm=Inf",
|
||||||
|
"PunctType=Comm",
|
||||||
|
"",
|
||||||
|
"Case=Nom|Person=2|PronType=Prs",
|
||||||
|
"Aspect=Prog|Tense=Pres|VerbForm=Part",
|
||||||
|
"PunctType=Peri",
|
||||||
|
],
|
||||||
|
"cats": {"greeting": 1.0, "question": 1.0},
|
||||||
|
},
|
||||||
),
|
),
|
||||||
# UTENSIL new label
|
# UTENSIL new label
|
||||||
(
|
(
|
||||||
'Natasha bought new forks.',
|
"Natasha bought new forks.",
|
||||||
{
|
{
|
||||||
'entities': [(0, 7, 'PERSON'), (19, 24, 'UTENSIL')],
|
"entities": [(0, 7, "PERSON"), (19, 24, "UTENSIL")],
|
||||||
'tags': ['PROPN', 'VERB', 'ADJ', 'NOUN', 'PUNCT'],
|
"tags": ["PROPN", "VERB", "ADJ", "NOUN", "PUNCT"],
|
||||||
'deps': ['nsubj', 'ROOT', 'amod', 'dobj', 'punct'],
|
"deps": ["nsubj", "ROOT", "amod", "dobj", "punct"],
|
||||||
'heads': [1, 1, 3, 1, 1],
|
"heads": [1, 1, 3, 1, 1],
|
||||||
'morphs': ['Number=Sing', 'Tense=Past|VerbForm=Fin', 'Degree=Pos', 'Number=Plur', 'PunctType=Peri'],
|
"morphs": [
|
||||||
'cats': {'statement': 1.0}
|
"Number=Sing",
|
||||||
}
|
"Tense=Past|VerbForm=Fin",
|
||||||
)
|
"Degree=Pos",
|
||||||
|
"Number=Plur",
|
||||||
|
"PunctType=Peri",
|
||||||
|
],
|
||||||
|
"cats": {"statement": 1.0},
|
||||||
|
},
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def _add_ner_label(ner, data):
|
def _add_ner_label(ner, data):
|
||||||
for _, annotations in data:
|
for _, annotations in data:
|
||||||
for ent in annotations['entities']:
|
for ent in annotations["entities"]:
|
||||||
ner.add_label(ent[2])
|
ner.add_label(ent[2])
|
||||||
|
|
||||||
|
|
||||||
def _add_tagger_label(tagger, data):
|
def _add_tagger_label(tagger, data):
|
||||||
for _, annotations in data:
|
for _, annotations in data:
|
||||||
for tag in annotations['tags']:
|
for tag in annotations["tags"]:
|
||||||
tagger.add_label(tag)
|
tagger.add_label(tag)
|
||||||
|
|
||||||
|
|
||||||
def _add_parser_label(parser, data):
|
def _add_parser_label(parser, data):
|
||||||
for _, annotations in data:
|
for _, annotations in data:
|
||||||
for dep in annotations['deps']:
|
for dep in annotations["deps"]:
|
||||||
parser.add_label(dep)
|
parser.add_label(dep)
|
||||||
|
|
||||||
|
|
||||||
def _add_textcat_label(textcat, data):
|
def _add_textcat_label(textcat, data):
|
||||||
for _, annotations in data:
|
for _, annotations in data:
|
||||||
for cat in annotations['cats']:
|
for cat in annotations["cats"]:
|
||||||
textcat.add_label(cat)
|
textcat.add_label(cat)
|
||||||
|
|
||||||
|
|
||||||
def _optimize(
|
def _optimize(nlp, component: str, data: List, rehearse: bool):
|
||||||
nlp,
|
|
||||||
component: str,
|
|
||||||
data: List,
|
|
||||||
rehearse: bool
|
|
||||||
):
|
|
||||||
"""Run either train or rehearse."""
|
"""Run either train or rehearse."""
|
||||||
pipe = nlp.get_pipe(component)
|
pipe = nlp.get_pipe(component)
|
||||||
if component == 'ner':
|
if component == "ner":
|
||||||
_add_ner_label(pipe, data)
|
_add_ner_label(pipe, data)
|
||||||
elif component == 'tagger':
|
elif component == "tagger":
|
||||||
_add_tagger_label(pipe, data)
|
_add_tagger_label(pipe, data)
|
||||||
elif component == 'parser':
|
elif component == "parser":
|
||||||
_add_tagger_label(pipe, data)
|
_add_tagger_label(pipe, data)
|
||||||
elif component == 'textcat_multilabel':
|
elif component == "textcat_multilabel":
|
||||||
_add_textcat_label(pipe, data)
|
_add_textcat_label(pipe, data)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
@ -160,7 +203,7 @@ def _optimize(
|
||||||
return nlp
|
return nlp
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("component", ['ner', 'tagger', 'parser', 'textcat_multilabel'])
|
@pytest.mark.parametrize("component", ["ner", "tagger", "parser", "textcat_multilabel"])
|
||||||
def test_rehearse(component):
|
def test_rehearse(component):
|
||||||
nlp = spacy.blank("en")
|
nlp = spacy.blank("en")
|
||||||
nlp.add_pipe(component)
|
nlp.add_pipe(component)
|
||||||
|
|
|
@ -253,7 +253,9 @@ def conllu_sentence_to_doc(
|
||||||
heads=heads,
|
heads=heads,
|
||||||
)
|
)
|
||||||
if set_ents:
|
if set_ents:
|
||||||
doc_x.ents = [Span(doc_x, ent.start, ent.end, label=ent.label) for ent in doc.ents]
|
doc_x.ents = [
|
||||||
|
Span(doc_x, ent.start, ent.end, label=ent.label) for ent in doc.ents
|
||||||
|
]
|
||||||
|
|
||||||
return doc_x
|
return doc_x
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user