Make sure nr_dim is an int

In some languages (e.g. Dutch), the nr_dim is extracted as a byte string, causing an error down the line.
This commit is contained in:
ines 2017-11-17 14:56:27 +01:00
parent a3d4dd1a5d
commit c57e05bec1

View File

@ -30,7 +30,7 @@ def main(vectors_loc, lang=None):
nlp.vocab.reset_vectors(width=int(nr_dim)) nlp.vocab.reset_vectors(width=int(nr_dim))
for line in file_: for line in file_:
line = line.rstrip().decode('utf8') line = line.rstrip().decode('utf8')
pieces = line.rsplit(' ', nr_dim) pieces = line.rsplit(' ', int(nr_dim))
word = pieces[0] word = pieces[0]
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f') vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
nlp.vocab.set_vector(word, vector) # add the vectors to the vocab nlp.vocab.set_vector(word, vector) # add the vectors to the vocab