mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* Use an AttributeCodec based on orth for words. Still no oov handling mechanism.
This commit is contained in:
parent
82d84b0f2b
commit
5b4c78bbb2
|
@ -8,7 +8,7 @@ from libcpp.pair cimport pair
|
|||
from cymem.cymem cimport Address, Pool
|
||||
from preshed.maps cimport PreshMap
|
||||
|
||||
from ..attrs cimport ID, SPACY, TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
|
||||
from ..attrs cimport ORTH, SPACY, TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
|
||||
from ..tokens.doc cimport Doc
|
||||
from ..vocab cimport Vocab
|
||||
from ..typedefs cimport attr_t
|
||||
|
@ -34,17 +34,6 @@ cimport cython
|
|||
# Entity tag
|
||||
|
||||
|
||||
def make_vocab_codec(Vocab vocab):
|
||||
cdef int length = len(vocab)
|
||||
cdef Address mem = Address(length, sizeof(float))
|
||||
probs = <float*>mem.ptr
|
||||
cdef int i
|
||||
for i in range(length):
|
||||
probs[i] = <float>c_exp(vocab.lexemes[i].prob)
|
||||
cdef float[:] cv_probs = <float[:len(vocab)]>probs
|
||||
return HuffmanCodec(cv_probs)
|
||||
|
||||
|
||||
cdef class _BinaryCodec:
|
||||
def encode(self, attr_t[:] msg, BitArray bits):
|
||||
cdef int i
|
||||
|
@ -112,9 +101,7 @@ cdef class Packer:
|
|||
attrs = []
|
||||
|
||||
for attr, freqs in list_of_attr_freqs:
|
||||
if attr == ID:
|
||||
codecs.append(make_vocab_codec(vocab))
|
||||
elif attr == SPACY:
|
||||
if attr == SPACY:
|
||||
codecs.append(_BinaryCodec())
|
||||
else:
|
||||
codecs.append(_AttributeCodec(freqs))
|
||||
|
|
Loading…
Reference in New Issue
Block a user