mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-02 02:43:36 +03:00
correct silly typo
This commit is contained in:
parent
f6c451b650
commit
1a151b10d6
|
@ -148,7 +148,7 @@ def generate_sentence(example_dict, has_ner_tags, tag_pattern, ner_map=None):
|
||||||
sentence = {}
|
sentence = {}
|
||||||
tokens = []
|
tokens = []
|
||||||
token_annotation = example_dict["token_annotation"]
|
token_annotation = example_dict["token_annotation"]
|
||||||
for i, id_ in enumerate(["ids"]):
|
for i, id_ in enumerate(token_annotation["ids"]):
|
||||||
token = {}
|
token = {}
|
||||||
token["id"] = id_
|
token["id"] = id_
|
||||||
token["orth"] = token_annotation["words"][i]
|
token["orth"] = token_annotation["words"][i]
|
||||||
|
|
|
@ -485,12 +485,12 @@ def _compile_gold(examples, pipeline, nlp):
|
||||||
"texts": set(),
|
"texts": set(),
|
||||||
}
|
}
|
||||||
for example in examples:
|
for example in examples:
|
||||||
gold = example.gold
|
gold = example.reference
|
||||||
doc = example.doc
|
doc = example.predicted
|
||||||
valid_words = [x for x in gold.words if x is not None]
|
valid_words = [x for x in gold if x is not None]
|
||||||
data["words"].update(valid_words)
|
data["words"].update(valid_words)
|
||||||
data["n_words"] += len(valid_words)
|
data["n_words"] += len(valid_words)
|
||||||
data["n_misaligned_words"] += len(gold.words) - len(valid_words)
|
data["n_misaligned_words"] += len(gold) - len(valid_words)
|
||||||
data["texts"].add(doc.text)
|
data["texts"].add(doc.text)
|
||||||
if len(nlp.vocab.vectors):
|
if len(nlp.vocab.vectors):
|
||||||
for word in valid_words:
|
for word in valid_words:
|
||||||
|
@ -545,10 +545,10 @@ def _format_labels(labels, counts=False):
|
||||||
|
|
||||||
def _get_examples_without_label(data, label):
|
def _get_examples_without_label(data, label):
|
||||||
count = 0
|
count = 0
|
||||||
for ex in data:
|
for eg in data:
|
||||||
labels = [
|
labels = [
|
||||||
label.split("-")[1]
|
label.split("-")[1]
|
||||||
for label in ex.gold.ner
|
for label in eg.gold.ner
|
||||||
if label not in ("O", "-", None)
|
if label not in ("O", "-", None)
|
||||||
]
|
]
|
||||||
if label not in labels:
|
if label not in labels:
|
||||||
|
|
|
@ -56,8 +56,10 @@ cdef class Example:
|
||||||
if "ORTH" not in tok_dict:
|
if "ORTH" not in tok_dict:
|
||||||
tok_dict["ORTH"] = [tok.text for tok in predicted]
|
tok_dict["ORTH"] = [tok.text for tok in predicted]
|
||||||
tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
|
tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
|
||||||
|
print("added ORTH and SPACY to the tok_dict")
|
||||||
if "SPACY" not in tok_dict:
|
if "SPACY" not in tok_dict:
|
||||||
tok_dict["SPACY"] = None
|
tok_dict["SPACY"] = None
|
||||||
|
print("added SPACY to the tok_dict")
|
||||||
return Example(
|
return Example(
|
||||||
predicted,
|
predicted,
|
||||||
annotations2doc(predicted.vocab, tok_dict, doc_dict)
|
annotations2doc(predicted.vocab, tok_dict, doc_dict)
|
||||||
|
@ -75,13 +77,15 @@ cdef class Example:
|
||||||
|
|
||||||
def get_aligned(self, field, as_string=False):
|
def get_aligned(self, field, as_string=False):
|
||||||
"""Return an aligned array for a token attribute."""
|
"""Return an aligned array for a token attribute."""
|
||||||
# TODO: This is probably wrong. I just bashed this out and there's probably
|
|
||||||
# all sorts of edge-cases.
|
|
||||||
alignment = self.alignment
|
alignment = self.alignment
|
||||||
i2j_multi = alignment.i2j_multi
|
i2j_multi = alignment.i2j_multi
|
||||||
j2i_multi = alignment.j2i_multi
|
j2i_multi = alignment.j2i_multi
|
||||||
gold_to_cand = alignment.gold_to_cand
|
gold_to_cand = alignment.gold_to_cand
|
||||||
cand_to_gold = alignment.cand_to_gold
|
cand_to_gold = alignment.cand_to_gold
|
||||||
|
print("i2j_multi", i2j_multi)
|
||||||
|
print("j2i_multi", j2i_multi)
|
||||||
|
print("gold_to_cand", gold_to_cand)
|
||||||
|
print("cand_to_gold", cand_to_gold)
|
||||||
|
|
||||||
vocab = self.reference.vocab
|
vocab = self.reference.vocab
|
||||||
gold_values = self.reference.to_array([field])
|
gold_values = self.reference.to_array([field])
|
||||||
|
@ -97,6 +101,7 @@ cdef class Example:
|
||||||
else:
|
else:
|
||||||
output[i] = gold_values[gold_i]
|
output[i] = gold_values[gold_i]
|
||||||
|
|
||||||
|
print("output before:" , output)
|
||||||
if field in ["ENT_IOB"]:
|
if field in ["ENT_IOB"]:
|
||||||
# Fix many-to-one IOB codes
|
# Fix many-to-one IOB codes
|
||||||
prev_j = -1
|
prev_j = -1
|
||||||
|
@ -111,17 +116,23 @@ cdef class Example:
|
||||||
prev_j = -1
|
prev_j = -1
|
||||||
prev_value = value
|
prev_value = value
|
||||||
|
|
||||||
|
print("output in between:" , output)
|
||||||
if field in ["ENT_IOB", "ENT_TYPE"]:
|
if field in ["ENT_IOB", "ENT_TYPE"]:
|
||||||
# Assign one-to-many NER tags
|
# Assign one-to-many NER tags
|
||||||
for j, cand_j in enumerate(gold_to_cand):
|
for j, cand_j in enumerate(gold_to_cand):
|
||||||
|
print()
|
||||||
|
print("j", j)
|
||||||
|
print("cand_j", cand_j)
|
||||||
if cand_j is None:
|
if cand_j is None:
|
||||||
if j in j2i_multi:
|
if j in j2i_multi:
|
||||||
i = j2i_multi[j]
|
i = j2i_multi[j]
|
||||||
if output[i] is None:
|
if output[i] is None:
|
||||||
output[i] = gold_values[j]
|
output[i] = gold_values[j]
|
||||||
|
|
||||||
|
print("output final:" , output)
|
||||||
if as_string:
|
if as_string:
|
||||||
output = [vocab.strings[o] if o is not None else o for o in output]
|
output = [vocab.strings[o] if o is not None else o for o in output]
|
||||||
|
print("output as string:" , output)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
from thinc.api import Adam, NumpyOps
|
from thinc.api import Adam, NumpyOps
|
||||||
from spacy.attrs import NORM
|
from spacy.attrs import NORM
|
||||||
from spacy.gold import GoldParse
|
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
|
|
||||||
from spacy.pipeline.defaults import default_parser, default_ner
|
from spacy.pipeline.defaults import default_parser, default_ner
|
||||||
|
|
|
@ -4,7 +4,6 @@ from spacy.vocab import Vocab
|
||||||
from spacy.pipeline.defaults import default_parser
|
from spacy.pipeline.defaults import default_parser
|
||||||
from spacy.pipeline import DependencyParser
|
from spacy.pipeline import DependencyParser
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
from spacy.gold import GoldParse
|
|
||||||
from spacy.syntax.nonproj import projectivize
|
from spacy.syntax.nonproj import projectivize
|
||||||
from spacy.syntax.stateclass import StateClass
|
from spacy.syntax.stateclass import StateClass
|
||||||
from spacy.syntax.arc_eager import ArcEager
|
from spacy.syntax.arc_eager import ArcEager
|
||||||
|
|
|
@ -5,7 +5,6 @@ from spacy.pipeline.defaults import default_ner
|
||||||
from spacy.pipeline import EntityRecognizer, EntityRuler
|
from spacy.pipeline import EntityRecognizer, EntityRuler
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
from spacy.syntax.ner import BiluoPushDown
|
from spacy.syntax.ner import BiluoPushDown
|
||||||
from spacy.gold import GoldParse
|
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
|
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
|
@ -4,7 +4,6 @@ from spacy.vocab import Vocab
|
||||||
from spacy.syntax.arc_eager import ArcEager
|
from spacy.syntax.arc_eager import ArcEager
|
||||||
from spacy.syntax.nn_parser import Parser
|
from spacy.syntax.nn_parser import Parser
|
||||||
from spacy.tokens.doc import Doc
|
from spacy.tokens.doc import Doc
|
||||||
from spacy.gold import GoldParse
|
|
||||||
from thinc.api import Model
|
from thinc.api import Model
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -6,9 +6,7 @@ from spacy.pipeline.defaults import default_parser
|
||||||
from spacy.pipeline import DependencyParser
|
from spacy.pipeline import DependencyParser
|
||||||
from spacy.syntax.arc_eager import ArcEager
|
from spacy.syntax.arc_eager import ArcEager
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
from spacy.syntax._beam_utils import ParserBeam
|
|
||||||
from spacy.syntax.stateclass import StateClass
|
from spacy.syntax.stateclass import StateClass
|
||||||
from spacy.gold import GoldParse
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
|
@ -7,7 +7,6 @@ from spacy.lang.en import English
|
||||||
from spacy.language import Language
|
from spacy.language import Language
|
||||||
from spacy.pipeline import TextCategorizer
|
from spacy.pipeline import TextCategorizer
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
from spacy.gold import GoldParse
|
|
||||||
from spacy.util import fix_random_seed
|
from spacy.util import fix_random_seed
|
||||||
|
|
||||||
from ..util import make_tempdir
|
from ..util import make_tempdir
|
||||||
|
|
|
@ -3,7 +3,7 @@ import gc
|
||||||
import numpy
|
import numpy
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
from spacy.gold import Example, TokenAnnotation
|
from spacy.gold import Example
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.lang.en.stop_words import STOP_WORDS
|
from spacy.lang.en.stop_words import STOP_WORDS
|
||||||
from spacy.lang.lex_attrs import is_stop
|
from spacy.lang.lex_attrs import is_stop
|
||||||
|
@ -268,20 +268,21 @@ def test_issue1963(en_tokenizer):
|
||||||
assert doc.tensor.shape == (3, 128)
|
assert doc.tensor.shape == (3, 128)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: fix
|
||||||
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
|
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
|
||||||
def test_issue1967(label):
|
def test_issue1967(label):
|
||||||
config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
|
config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
|
||||||
ner = EntityRecognizer(Vocab(), default_ner(), **config)
|
ner = EntityRecognizer(Vocab(), default_ner(), **config)
|
||||||
example = Example(
|
example = Example.from_dict(
|
||||||
doc=Doc(ner.vocab, words=["word"]),
|
Doc(ner.vocab, words=["word"]),
|
||||||
token_annotation=TokenAnnotation(
|
{
|
||||||
ids=[0],
|
"ids": [0],
|
||||||
words=["word"],
|
"words": ["word"],
|
||||||
tags=["tag"],
|
"tags": ["tag"],
|
||||||
heads=[0],
|
"heads": [0],
|
||||||
deps=["dep"],
|
"deps": ["dep"],
|
||||||
entities=[label]
|
"entities": [label]
|
||||||
)
|
}
|
||||||
)
|
)
|
||||||
ner.moves.get_actions(gold_parses=[example])
|
ner.moves.get_actions(gold_parses=[example])
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import pytest
|
import pytest
|
||||||
from spacy.gold import GoldParse
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user