1
1
mirror of https://github.com/explosion/spaCy.git synced 2025-04-15 14:42:00 +03:00

correct silly typo

This commit is contained in:
svlandeg 2020-06-17 14:48:14 +02:00
parent f6c451b650
commit 1a151b10d6
11 changed files with 32 additions and 28 deletions

View File

@ -148,7 +148,7 @@ def generate_sentence(example_dict, has_ner_tags, tag_pattern, ner_map=None):
sentence = {}
tokens = []
token_annotation = example_dict["token_annotation"]
for i, id_ in enumerate(["ids"]):
for i, id_ in enumerate(token_annotation["ids"]):
token = {}
token["id"] = id_
token["orth"] = token_annotation["words"][i]

View File

@ -485,12 +485,12 @@ def _compile_gold(examples, pipeline, nlp):
"texts": set(),
}
for example in examples:
gold = example.gold
doc = example.doc
valid_words = [x for x in gold.words if x is not None]
gold = example.reference
doc = example.predicted
valid_words = [x for x in gold if x is not None]
data["words"].update(valid_words)
data["n_words"] += len(valid_words)
data["n_misaligned_words"] += len(gold.words) - len(valid_words)
data["n_misaligned_words"] += len(gold) - len(valid_words)
data["texts"].add(doc.text)
if len(nlp.vocab.vectors):
for word in valid_words:
@ -545,10 +545,10 @@ def _format_labels(labels, counts=False):
def _get_examples_without_label(data, label):
count = 0
for ex in data:
for eg in data:
labels = [
label.split("-")[1]
for label in ex.gold.ner
for label in eg.gold.ner
if label not in ("O", "-", None)
]
if label not in labels:

View File

@ -56,8 +56,10 @@ cdef class Example:
if "ORTH" not in tok_dict:
tok_dict["ORTH"] = [tok.text for tok in predicted]
tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
print("added ORTH and SPACY to the tok_dict")
if "SPACY" not in tok_dict:
tok_dict["SPACY"] = None
print("added SPACY to the tok_dict")
return Example(
predicted,
annotations2doc(predicted.vocab, tok_dict, doc_dict)
@ -75,13 +77,15 @@ cdef class Example:
def get_aligned(self, field, as_string=False):
"""Return an aligned array for a token attribute."""
# TODO: This is probably wrong. I just bashed this out and there's probably
# all sorts of edge-cases.
alignment = self.alignment
i2j_multi = alignment.i2j_multi
j2i_multi = alignment.j2i_multi
gold_to_cand = alignment.gold_to_cand
cand_to_gold = alignment.cand_to_gold
print("i2j_multi", i2j_multi)
print("j2i_multi", j2i_multi)
print("gold_to_cand", gold_to_cand)
print("cand_to_gold", cand_to_gold)
vocab = self.reference.vocab
gold_values = self.reference.to_array([field])
@ -97,6 +101,7 @@ cdef class Example:
else:
output[i] = gold_values[gold_i]
print("output before:" , output)
if field in ["ENT_IOB"]:
# Fix many-to-one IOB codes
prev_j = -1
@ -111,17 +116,23 @@ cdef class Example:
prev_j = -1
prev_value = value
print("output in between:" , output)
if field in ["ENT_IOB", "ENT_TYPE"]:
# Assign one-to-many NER tags
for j, cand_j in enumerate(gold_to_cand):
print()
print("j", j)
print("cand_j", cand_j)
if cand_j is None:
if j in j2i_multi:
i = j2i_multi[j]
if output[i] is None:
output[i] = gold_values[j]
print("output final:" , output)
if as_string:
output = [vocab.strings[o] if o is not None else o for o in output]
print("output as string:" , output)
return output
def to_dict(self):

View File

@ -1,7 +1,6 @@
import pytest
from thinc.api import Adam, NumpyOps
from spacy.attrs import NORM
from spacy.gold import GoldParse
from spacy.vocab import Vocab
from spacy.pipeline.defaults import default_parser, default_ner

View File

@ -4,7 +4,6 @@ from spacy.vocab import Vocab
from spacy.pipeline.defaults import default_parser
from spacy.pipeline import DependencyParser
from spacy.tokens import Doc
from spacy.gold import GoldParse
from spacy.syntax.nonproj import projectivize
from spacy.syntax.stateclass import StateClass
from spacy.syntax.arc_eager import ArcEager

View File

@ -5,7 +5,6 @@ from spacy.pipeline.defaults import default_ner
from spacy.pipeline import EntityRecognizer, EntityRuler
from spacy.vocab import Vocab
from spacy.syntax.ner import BiluoPushDown
from spacy.gold import GoldParse
from spacy.tokens import Doc
from ..util import make_tempdir

View File

@ -4,7 +4,6 @@ from spacy.vocab import Vocab
from spacy.syntax.arc_eager import ArcEager
from spacy.syntax.nn_parser import Parser
from spacy.tokens.doc import Doc
from spacy.gold import GoldParse
from thinc.api import Model

View File

@ -6,9 +6,7 @@ from spacy.pipeline.defaults import default_parser
from spacy.pipeline import DependencyParser
from spacy.syntax.arc_eager import ArcEager
from spacy.tokens import Doc
from spacy.syntax._beam_utils import ParserBeam
from spacy.syntax.stateclass import StateClass
from spacy.gold import GoldParse
@pytest.fixture

View File

@ -7,7 +7,6 @@ from spacy.lang.en import English
from spacy.language import Language
from spacy.pipeline import TextCategorizer
from spacy.tokens import Doc
from spacy.gold import GoldParse
from spacy.util import fix_random_seed
from ..util import make_tempdir

View File

@ -3,7 +3,7 @@ import gc
import numpy
import copy
from spacy.gold import Example, TokenAnnotation
from spacy.gold import Example
from spacy.lang.en import English
from spacy.lang.en.stop_words import STOP_WORDS
from spacy.lang.lex_attrs import is_stop
@ -268,20 +268,21 @@ def test_issue1963(en_tokenizer):
assert doc.tensor.shape == (3, 128)
# TODO: fix
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
def test_issue1967(label):
config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
ner = EntityRecognizer(Vocab(), default_ner(), **config)
example = Example(
doc=Doc(ner.vocab, words=["word"]),
token_annotation=TokenAnnotation(
ids=[0],
words=["word"],
tags=["tag"],
heads=[0],
deps=["dep"],
entities=[label]
)
example = Example.from_dict(
Doc(ner.vocab, words=["word"]),
{
"ids": [0],
"words": ["word"],
"tags": ["tag"],
"heads": [0],
"deps": ["dep"],
"entities": [label]
}
)
ner.moves.get_actions(gold_parses=[example])

View File

@ -1,5 +1,4 @@
import pytest
from spacy.gold import GoldParse
@pytest.mark.parametrize(