mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
e48a09df4e
* OrigAnnot class instead of gold.orig_annot list of zipped tuples * from_orig to replace from_annot_tuples * rename to RawAnnot * some unit tests for GoldParse creation and internal format * removing orig_annot and switching to lists instead of tuple * rewriting tuples to use RawAnnot (+ debug statements, WIP) * fix pop() changing the data * small fixes * pop-append fixes * return RawAnnot for existing GoldParse to have uniform interface * clean up imports * fix merge_sents * add unit test for 4402 with new structure (not working yet) * introduce DocAnnot * typo fixes * add unit test for merge_sents * rename from_orig to from_raw * fixing unit tests * fix nn parser * read_annots to produce text, doc_annot pairs * _make_golds fix * rename golds_to_gold_annots * small fixes * fix encoding * have golds_to_gold_annots use DocAnnot * missed a spot * merge_sents as function in DocAnnot * allow specifying only part of the token-level annotations * refactor with Example class + underlying dicts * pipeline components to work with Example objects (wip) * input checking * fix yielding * fix calls to update * small fixes * fix scorer unit test with new format * fix kwargs order * fixes for ud and conllu scripts * fix reading data for conllu script * add in proper errors (not fixed numbering yet to avoid merge conflicts) * fixing few more small bugs * fix EL script
66 lines
1.3 KiB
Cython
66 lines
1.3 KiB
Cython
from cymem.cymem cimport Pool
|
|
|
|
from spacy.tokens import Doc
|
|
from .typedefs cimport attr_t
|
|
from .syntax.transition_system cimport Transition
|
|
|
|
|
|
cdef struct GoldParseC:
|
|
int* tags
|
|
int* heads
|
|
int* has_dep
|
|
int* sent_start
|
|
attr_t* labels
|
|
int** brackets
|
|
Transition* ner
|
|
|
|
|
|
cdef class GoldParse:
|
|
cdef Pool mem
|
|
|
|
cdef GoldParseC c
|
|
cdef readonly TokenAnnotation orig
|
|
|
|
cdef int length
|
|
cdef public int loss
|
|
cdef public list words
|
|
cdef public list tags
|
|
cdef public list morphology
|
|
cdef public list heads
|
|
cdef public list labels
|
|
cdef public dict orths
|
|
cdef public list ner
|
|
cdef public dict brackets
|
|
cdef public dict cats
|
|
cdef public dict links
|
|
|
|
cdef readonly list cand_to_gold
|
|
cdef readonly list gold_to_cand
|
|
|
|
|
|
cdef class TokenAnnotation:
|
|
cdef public list ids
|
|
cdef public list words
|
|
cdef public list tags
|
|
cdef public list heads
|
|
cdef public list deps
|
|
cdef public list entities
|
|
cdef public list morphology
|
|
cdef public list brackets
|
|
|
|
|
|
cdef class DocAnnotation:
|
|
cdef public object cats
|
|
cdef public object links
|
|
|
|
|
|
cdef class Example:
|
|
cdef public object doc
|
|
cdef public list token_annotations
|
|
cdef public DocAnnotation doc_annotation
|
|
cdef public object make_projective
|
|
cdef public object ignore_misaligned
|
|
cdef public object goldparse
|
|
|
|
|