mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-11 09:00:36 +03:00
fix test_cli
This commit is contained in:
parent
f7ad8e8c83
commit
2d9f406188
|
@ -4,7 +4,6 @@ import random
|
|||
import warnings
|
||||
import srsly
|
||||
import spacy
|
||||
from spacy.gold import GoldParse
|
||||
from spacy.util import minibatch, compounding
|
||||
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ from ml_datasets import loaders
|
|||
import spacy
|
||||
from spacy import util
|
||||
from spacy.util import minibatch, compounding
|
||||
from spacy.gold import Example, GoldParse
|
||||
from spacy.gold import Example
|
||||
|
||||
|
||||
@plac.annotations(
|
||||
|
|
|
@ -12,7 +12,6 @@ def conllu2json(
|
|||
input_data,
|
||||
n_sents=10,
|
||||
append_morphology=False,
|
||||
lang=None,
|
||||
ner_map=None,
|
||||
merge_subtokens=False,
|
||||
no_print=False,
|
||||
|
@ -41,10 +40,10 @@ def conllu2json(
|
|||
)
|
||||
has_ner_tags = has_ner(input_data, MISC_NER_PATTERN)
|
||||
for i, example in enumerate(conll_data):
|
||||
raw += example.predicted.text
|
||||
raw += example.text
|
||||
sentences.append(
|
||||
generate_sentence(
|
||||
example,
|
||||
example.to_dict(),
|
||||
has_ner_tags,
|
||||
MISC_NER_PATTERN,
|
||||
ner_map=ner_map,
|
||||
|
@ -145,21 +144,21 @@ def get_entities(lines, tag_pattern, ner_map=None):
|
|||
return iob_to_biluo(iob)
|
||||
|
||||
|
||||
def generate_sentence(token_annotation, has_ner_tags, tag_pattern, ner_map=None):
|
||||
def generate_sentence(example_dict, has_ner_tags, tag_pattern, ner_map=None):
|
||||
sentence = {}
|
||||
tokens = []
|
||||
for i, id_ in enumerate(token_annotation.ids):
|
||||
for i, id_ in enumerate(example_dict["token_annotation"]["ids"]):
|
||||
token = {}
|
||||
token["id"] = id_
|
||||
token["orth"] = token_annotation.get_word(i)
|
||||
token["tag"] = token_annotation.get_tag(i)
|
||||
token["pos"] = token_annotation.get_pos(i)
|
||||
token["lemma"] = token_annotation.get_lemma(i)
|
||||
token["morph"] = token_annotation.get_morph(i)
|
||||
token["head"] = token_annotation.get_head(i) - id_
|
||||
token["dep"] = token_annotation.get_dep(i)
|
||||
token["orth"] = example_dict["token_annotation"]["words"][i]
|
||||
token["tag"] = example_dict["token_annotation"]["tags"][i]
|
||||
token["pos"] = example_dict["token_annotation"]["pos"][i]
|
||||
token["lemma"] = example_dict["token_annotation"]["lemmas"][i]
|
||||
token["morph"] = example_dict["token_annotation"]["morphs"][i]
|
||||
token["head"] = example_dict["token_annotation"]["heads"][i] - i
|
||||
token["dep"] = example_dict["token_annotation"]["deps"][i]
|
||||
if has_ner_tags:
|
||||
token["ner"] = token_annotation.get_entity(i)
|
||||
token["ner"] = example_dict["doc_annotation"]["entities"][i]
|
||||
tokens.append(token)
|
||||
sentence["tokens"] = tokens
|
||||
return sentence
|
||||
|
@ -251,6 +250,7 @@ def example_from_conllu_sentence(
|
|||
for i in range(len(doc)):
|
||||
doc[i].tag_ = tags[i]
|
||||
doc[i].pos_ = poses[i]
|
||||
doc[i].morph_ = morphs[i]
|
||||
doc[i].dep_ = deps[i]
|
||||
doc[i].lemma_ = lemmas[i]
|
||||
doc[i].head = doc[heads[i]]
|
||||
|
@ -267,14 +267,26 @@ def example_from_conllu_sentence(
|
|||
doc = merge_conllu_subtokens(lines, doc)
|
||||
|
||||
# create Example from custom Doc annotation
|
||||
words, spaces = [], []
|
||||
words, spaces, tags, morphs, lemmas = [], [], [], [], []
|
||||
for i, t in enumerate(doc):
|
||||
words.append(t._.merged_orth)
|
||||
lemmas.append(t._.merged_lemma)
|
||||
spaces.append(t._.merged_spaceafter)
|
||||
morphs.append(t._.merged_morph)
|
||||
if append_morphology and t._.merged_morph:
|
||||
t.tag_ = t.tag_ + "__" + t._.merged_morph
|
||||
tags.append(t.tag_ + "__" + t._.merged_morph)
|
||||
else:
|
||||
tags.append(t.tag_)
|
||||
|
||||
return Example(predicted=Doc(vocab, words=words, spaces=spaces), reference=doc)
|
||||
doc_x = Doc(vocab, words=words, spaces=spaces)
|
||||
ref_dict = Example(doc_x, reference=doc).to_dict()
|
||||
ref_dict["words"] = words
|
||||
ref_dict["lemmas"] = lemmas
|
||||
ref_dict["spaces"] = spaces
|
||||
ref_dict["tags"] = tags
|
||||
ref_dict["morphs"] = morphs
|
||||
example = Example.from_dict(doc_x, ref_dict)
|
||||
return example
|
||||
|
||||
|
||||
def merge_conllu_subtokens(lines, doc):
|
||||
|
|
|
@ -167,9 +167,16 @@ cdef class Example:
|
|||
)
|
||||
return output
|
||||
|
||||
def text(self):
|
||||
property text:
|
||||
def __get__(self):
|
||||
return self.x.text
|
||||
|
||||
def __str__(self):
|
||||
return str(self.to_dict())
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.to_dict())
|
||||
|
||||
|
||||
def _annot2array(vocab, tok_annot, doc_annot):
|
||||
attrs = []
|
||||
|
|
Loading…
Reference in New Issue
Block a user