From 5b4c78bbb299622f2d8fa4cba7f5cca4483539aa Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 18 Jul 2015 22:43:18 +0200 Subject: [PATCH] * Use an AttributeCodec based on orth for words. Still no oov handling mechanism. --- spacy/serialize/packer.pyx | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/spacy/serialize/packer.pyx b/spacy/serialize/packer.pyx index c7a12518d..09f6de57a 100644 --- a/spacy/serialize/packer.pyx +++ b/spacy/serialize/packer.pyx @@ -8,7 +8,7 @@ from libcpp.pair cimport pair from cymem.cymem cimport Address, Pool from preshed.maps cimport PreshMap -from ..attrs cimport ID, SPACY, TAG, HEAD, DEP, ENT_IOB, ENT_TYPE +from ..attrs cimport ORTH, SPACY, TAG, HEAD, DEP, ENT_IOB, ENT_TYPE from ..tokens.doc cimport Doc from ..vocab cimport Vocab from ..typedefs cimport attr_t @@ -34,17 +34,6 @@ cimport cython # Entity tag -def make_vocab_codec(Vocab vocab): - cdef int length = len(vocab) - cdef Address mem = Address(length, sizeof(float)) - probs = mem.ptr - cdef int i - for i in range(length): - probs[i] = c_exp(vocab.lexemes[i].prob) - cdef float[:] cv_probs = probs - return HuffmanCodec(cv_probs) - - cdef class _BinaryCodec: def encode(self, attr_t[:] msg, BitArray bits): cdef int i @@ -112,9 +101,7 @@ cdef class Packer: attrs = [] for attr, freqs in list_of_attr_freqs: - if attr == ID: - codecs.append(make_vocab_codec(vocab)) - elif attr == SPACY: + if attr == SPACY: codecs.append(_BinaryCodec()) else: codecs.append(_AttributeCodec(freqs))