diff --git a/examples/training/rehearsal.py b/examples/training/rehearsal.py index 98a96643b..92002b5e5 100644 --- a/examples/training/rehearsal.py +++ b/examples/training/rehearsal.py @@ -4,7 +4,6 @@ import random import warnings import srsly import spacy -from spacy.gold import GoldParse from spacy.util import minibatch, compounding diff --git a/examples/training/train_textcat.py b/examples/training/train_textcat.py index c5e679467..256aaa293 100644 --- a/examples/training/train_textcat.py +++ b/examples/training/train_textcat.py @@ -19,7 +19,7 @@ from ml_datasets import loaders import spacy from spacy import util from spacy.util import minibatch, compounding -from spacy.gold import Example, GoldParse +from spacy.gold import Example @plac.annotations( diff --git a/spacy/cli/converters/conllu2json.py b/spacy/cli/converters/conllu2json.py index a7d59b9ba..d9c1a2b3f 100644 --- a/spacy/cli/converters/conllu2json.py +++ b/spacy/cli/converters/conllu2json.py @@ -12,7 +12,6 @@ def conllu2json( input_data, n_sents=10, append_morphology=False, - lang=None, ner_map=None, merge_subtokens=False, no_print=False, @@ -41,10 +40,10 @@ def conllu2json( ) has_ner_tags = has_ner(input_data, MISC_NER_PATTERN) for i, example in enumerate(conll_data): - raw += example.predicted.text + raw += example.text sentences.append( generate_sentence( - example, + example.to_dict(), has_ner_tags, MISC_NER_PATTERN, ner_map=ner_map, @@ -145,21 +144,21 @@ def get_entities(lines, tag_pattern, ner_map=None): return iob_to_biluo(iob) -def generate_sentence(token_annotation, has_ner_tags, tag_pattern, ner_map=None): +def generate_sentence(example_dict, has_ner_tags, tag_pattern, ner_map=None): sentence = {} tokens = [] - for i, id_ in enumerate(token_annotation.ids): + for i, id_ in enumerate(example_dict["token_annotation"]["ids"]): token = {} token["id"] = id_ - token["orth"] = token_annotation.get_word(i) - token["tag"] = token_annotation.get_tag(i) - token["pos"] = token_annotation.get_pos(i) - token["lemma"] = token_annotation.get_lemma(i) - token["morph"] = token_annotation.get_morph(i) - token["head"] = token_annotation.get_head(i) - id_ - token["dep"] = token_annotation.get_dep(i) + token["orth"] = example_dict["token_annotation"]["words"][i] + token["tag"] = example_dict["token_annotation"]["tags"][i] + token["pos"] = example_dict["token_annotation"]["pos"][i] + token["lemma"] = example_dict["token_annotation"]["lemmas"][i] + token["morph"] = example_dict["token_annotation"]["morphs"][i] + token["head"] = example_dict["token_annotation"]["heads"][i] - i + token["dep"] = example_dict["token_annotation"]["deps"][i] if has_ner_tags: - token["ner"] = token_annotation.get_entity(i) + token["ner"] = example_dict["doc_annotation"]["entities"][i] tokens.append(token) sentence["tokens"] = tokens return sentence @@ -251,6 +250,7 @@ def example_from_conllu_sentence( for i in range(len(doc)): doc[i].tag_ = tags[i] doc[i].pos_ = poses[i] + doc[i].morph_ = morphs[i] doc[i].dep_ = deps[i] doc[i].lemma_ = lemmas[i] doc[i].head = doc[heads[i]] @@ -267,14 +267,26 @@ def example_from_conllu_sentence( doc = merge_conllu_subtokens(lines, doc) # create Example from custom Doc annotation - words, spaces = [], [] + words, spaces, tags, morphs, lemmas = [], [], [], [], [] for i, t in enumerate(doc): words.append(t._.merged_orth) + lemmas.append(t._.merged_lemma) spaces.append(t._.merged_spaceafter) + morphs.append(t._.merged_morph) if append_morphology and t._.merged_morph: - t.tag_ = t.tag_ + "__" + t._.merged_morph + tags.append(t.tag_ + "__" + t._.merged_morph) + else: + tags.append(t.tag_) - return Example(predicted=Doc(vocab, words=words, spaces=spaces), reference=doc) + doc_x = Doc(vocab, words=words, spaces=spaces) + ref_dict = Example(doc_x, reference=doc).to_dict() + ref_dict["words"] = words + ref_dict["lemmas"] = lemmas + ref_dict["spaces"] = spaces + ref_dict["tags"] = tags + ref_dict["morphs"] = morphs + example = Example.from_dict(doc_x, ref_dict) + return example def merge_conllu_subtokens(lines, doc): diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx index 1538be923..4c84c066b 100644 --- a/spacy/gold/example.pyx +++ b/spacy/gold/example.pyx @@ -167,8 +167,15 @@ cdef class Example: ) return output - def text(self): - return self.x.text + property text: + def __get__(self): + return self.x.text + + def __str__(self): + return str(self.to_dict()) + + def __repr__(self): + return str(self.to_dict()) def _annot2array(vocab, tok_annot, doc_annot):