mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-15 14:42:00 +03:00
correct silly typo
This commit is contained in:
parent
f6c451b650
commit
1a151b10d6
spacy
cli
gold
tests
|
@ -148,7 +148,7 @@ def generate_sentence(example_dict, has_ner_tags, tag_pattern, ner_map=None):
|
|||
sentence = {}
|
||||
tokens = []
|
||||
token_annotation = example_dict["token_annotation"]
|
||||
for i, id_ in enumerate(["ids"]):
|
||||
for i, id_ in enumerate(token_annotation["ids"]):
|
||||
token = {}
|
||||
token["id"] = id_
|
||||
token["orth"] = token_annotation["words"][i]
|
||||
|
|
|
@ -485,12 +485,12 @@ def _compile_gold(examples, pipeline, nlp):
|
|||
"texts": set(),
|
||||
}
|
||||
for example in examples:
|
||||
gold = example.gold
|
||||
doc = example.doc
|
||||
valid_words = [x for x in gold.words if x is not None]
|
||||
gold = example.reference
|
||||
doc = example.predicted
|
||||
valid_words = [x for x in gold if x is not None]
|
||||
data["words"].update(valid_words)
|
||||
data["n_words"] += len(valid_words)
|
||||
data["n_misaligned_words"] += len(gold.words) - len(valid_words)
|
||||
data["n_misaligned_words"] += len(gold) - len(valid_words)
|
||||
data["texts"].add(doc.text)
|
||||
if len(nlp.vocab.vectors):
|
||||
for word in valid_words:
|
||||
|
@ -545,10 +545,10 @@ def _format_labels(labels, counts=False):
|
|||
|
||||
def _get_examples_without_label(data, label):
|
||||
count = 0
|
||||
for ex in data:
|
||||
for eg in data:
|
||||
labels = [
|
||||
label.split("-")[1]
|
||||
for label in ex.gold.ner
|
||||
for label in eg.gold.ner
|
||||
if label not in ("O", "-", None)
|
||||
]
|
||||
if label not in labels:
|
||||
|
|
|
@ -56,8 +56,10 @@ cdef class Example:
|
|||
if "ORTH" not in tok_dict:
|
||||
tok_dict["ORTH"] = [tok.text for tok in predicted]
|
||||
tok_dict["SPACY"] = [tok.whitespace_ for tok in predicted]
|
||||
print("added ORTH and SPACY to the tok_dict")
|
||||
if "SPACY" not in tok_dict:
|
||||
tok_dict["SPACY"] = None
|
||||
print("added SPACY to the tok_dict")
|
||||
return Example(
|
||||
predicted,
|
||||
annotations2doc(predicted.vocab, tok_dict, doc_dict)
|
||||
|
@ -75,13 +77,15 @@ cdef class Example:
|
|||
|
||||
def get_aligned(self, field, as_string=False):
|
||||
"""Return an aligned array for a token attribute."""
|
||||
# TODO: This is probably wrong. I just bashed this out and there's probably
|
||||
# all sorts of edge-cases.
|
||||
alignment = self.alignment
|
||||
i2j_multi = alignment.i2j_multi
|
||||
j2i_multi = alignment.j2i_multi
|
||||
gold_to_cand = alignment.gold_to_cand
|
||||
cand_to_gold = alignment.cand_to_gold
|
||||
print("i2j_multi", i2j_multi)
|
||||
print("j2i_multi", j2i_multi)
|
||||
print("gold_to_cand", gold_to_cand)
|
||||
print("cand_to_gold", cand_to_gold)
|
||||
|
||||
vocab = self.reference.vocab
|
||||
gold_values = self.reference.to_array([field])
|
||||
|
@ -97,6 +101,7 @@ cdef class Example:
|
|||
else:
|
||||
output[i] = gold_values[gold_i]
|
||||
|
||||
print("output before:" , output)
|
||||
if field in ["ENT_IOB"]:
|
||||
# Fix many-to-one IOB codes
|
||||
prev_j = -1
|
||||
|
@ -111,17 +116,23 @@ cdef class Example:
|
|||
prev_j = -1
|
||||
prev_value = value
|
||||
|
||||
print("output in between:" , output)
|
||||
if field in ["ENT_IOB", "ENT_TYPE"]:
|
||||
# Assign one-to-many NER tags
|
||||
for j, cand_j in enumerate(gold_to_cand):
|
||||
print()
|
||||
print("j", j)
|
||||
print("cand_j", cand_j)
|
||||
if cand_j is None:
|
||||
if j in j2i_multi:
|
||||
i = j2i_multi[j]
|
||||
if output[i] is None:
|
||||
output[i] = gold_values[j]
|
||||
|
||||
print("output final:" , output)
|
||||
if as_string:
|
||||
output = [vocab.strings[o] if o is not None else o for o in output]
|
||||
print("output as string:" , output)
|
||||
return output
|
||||
|
||||
def to_dict(self):
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import pytest
|
||||
from thinc.api import Adam, NumpyOps
|
||||
from spacy.attrs import NORM
|
||||
from spacy.gold import GoldParse
|
||||
from spacy.vocab import Vocab
|
||||
|
||||
from spacy.pipeline.defaults import default_parser, default_ner
|
||||
|
|
|
@ -4,7 +4,6 @@ from spacy.vocab import Vocab
|
|||
from spacy.pipeline.defaults import default_parser
|
||||
from spacy.pipeline import DependencyParser
|
||||
from spacy.tokens import Doc
|
||||
from spacy.gold import GoldParse
|
||||
from spacy.syntax.nonproj import projectivize
|
||||
from spacy.syntax.stateclass import StateClass
|
||||
from spacy.syntax.arc_eager import ArcEager
|
||||
|
|
|
@ -5,7 +5,6 @@ from spacy.pipeline.defaults import default_ner
|
|||
from spacy.pipeline import EntityRecognizer, EntityRuler
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.syntax.ner import BiluoPushDown
|
||||
from spacy.gold import GoldParse
|
||||
from spacy.tokens import Doc
|
||||
|
||||
from ..util import make_tempdir
|
||||
|
|
|
@ -4,7 +4,6 @@ from spacy.vocab import Vocab
|
|||
from spacy.syntax.arc_eager import ArcEager
|
||||
from spacy.syntax.nn_parser import Parser
|
||||
from spacy.tokens.doc import Doc
|
||||
from spacy.gold import GoldParse
|
||||
from thinc.api import Model
|
||||
|
||||
|
||||
|
|
|
@ -6,9 +6,7 @@ from spacy.pipeline.defaults import default_parser
|
|||
from spacy.pipeline import DependencyParser
|
||||
from spacy.syntax.arc_eager import ArcEager
|
||||
from spacy.tokens import Doc
|
||||
from spacy.syntax._beam_utils import ParserBeam
|
||||
from spacy.syntax.stateclass import StateClass
|
||||
from spacy.gold import GoldParse
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
|
@ -7,7 +7,6 @@ from spacy.lang.en import English
|
|||
from spacy.language import Language
|
||||
from spacy.pipeline import TextCategorizer
|
||||
from spacy.tokens import Doc
|
||||
from spacy.gold import GoldParse
|
||||
from spacy.util import fix_random_seed
|
||||
|
||||
from ..util import make_tempdir
|
||||
|
|
|
@ -3,7 +3,7 @@ import gc
|
|||
import numpy
|
||||
import copy
|
||||
|
||||
from spacy.gold import Example, TokenAnnotation
|
||||
from spacy.gold import Example
|
||||
from spacy.lang.en import English
|
||||
from spacy.lang.en.stop_words import STOP_WORDS
|
||||
from spacy.lang.lex_attrs import is_stop
|
||||
|
@ -268,20 +268,21 @@ def test_issue1963(en_tokenizer):
|
|||
assert doc.tensor.shape == (3, 128)
|
||||
|
||||
|
||||
# TODO: fix
|
||||
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
|
||||
def test_issue1967(label):
|
||||
config = {"learn_tokens": False, "min_action_freq": 30, "beam_width": 1, "beam_update_prob": 1.0}
|
||||
ner = EntityRecognizer(Vocab(), default_ner(), **config)
|
||||
example = Example(
|
||||
doc=Doc(ner.vocab, words=["word"]),
|
||||
token_annotation=TokenAnnotation(
|
||||
ids=[0],
|
||||
words=["word"],
|
||||
tags=["tag"],
|
||||
heads=[0],
|
||||
deps=["dep"],
|
||||
entities=[label]
|
||||
)
|
||||
example = Example.from_dict(
|
||||
Doc(ner.vocab, words=["word"]),
|
||||
{
|
||||
"ids": [0],
|
||||
"words": ["word"],
|
||||
"tags": ["tag"],
|
||||
"heads": [0],
|
||||
"deps": ["dep"],
|
||||
"entities": [label]
|
||||
}
|
||||
)
|
||||
ner.moves.get_actions(gold_parses=[example])
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import pytest
|
||||
from spacy.gold import GoldParse
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
|
Loading…
Reference in New Issue
Block a user