mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 09:56:28 +03:00
609c0ba557
* Tidy up train-from-config a bit * Fix accidentally quadratic perf in TokenAnnotation.brackets When we're reading in the gold data, we had a nested loop where we looped over the brackets for each token, looking for brackets that start on that word. This is accidentally quadratic, because we have one bracket per word (for the POS tags). So we had an O(N**2) behaviour here that ended up being pretty slow. To solve this I'm indexing the brackets by their starting word on the TokenAnnotations object, and having a property to provide the previous view. * Fixes
69 lines
1.4 KiB
Cython
69 lines
1.4 KiB
Cython
from cymem.cymem cimport Pool
|
|
|
|
from .typedefs cimport attr_t
|
|
from .syntax.transition_system cimport Transition
|
|
|
|
from .tokens import Doc
|
|
|
|
|
|
cdef struct GoldParseC:
|
|
int* tags
|
|
int* heads
|
|
int* has_dep
|
|
int* sent_start
|
|
attr_t* labels
|
|
int** brackets
|
|
Transition* ner
|
|
|
|
|
|
cdef class GoldParse:
|
|
cdef Pool mem
|
|
|
|
cdef GoldParseC c
|
|
cdef readonly TokenAnnotation orig
|
|
|
|
cdef int length
|
|
cdef public int loss
|
|
cdef public list words
|
|
cdef public list tags
|
|
cdef public list pos
|
|
cdef public list morphs
|
|
cdef public list lemmas
|
|
cdef public list sent_starts
|
|
cdef public list heads
|
|
cdef public list labels
|
|
cdef public dict orths
|
|
cdef public list ner
|
|
cdef public dict brackets
|
|
cdef public dict cats
|
|
cdef public dict links
|
|
|
|
cdef readonly list cand_to_gold
|
|
cdef readonly list gold_to_cand
|
|
|
|
|
|
cdef class TokenAnnotation:
|
|
cdef public list ids
|
|
cdef public list words
|
|
cdef public list tags
|
|
cdef public list pos
|
|
cdef public list morphs
|
|
cdef public list lemmas
|
|
cdef public list heads
|
|
cdef public list deps
|
|
cdef public list entities
|
|
cdef public list sent_starts
|
|
cdef public dict brackets_by_start
|
|
|
|
|
|
cdef class DocAnnotation:
|
|
cdef public object cats
|
|
cdef public object links
|
|
|
|
|
|
cdef class Example:
|
|
cdef public object doc
|
|
cdef public TokenAnnotation token_annotation
|
|
cdef public DocAnnotation doc_annotation
|
|
cdef public object goldparse
|