mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 09:56:28 +03:00
Make sure nr_dim is an int
In some languages (e.g. Dutch), the nr_dim is extracted as a byte string, causing an error down the line.
This commit is contained in:
parent
a3d4dd1a5d
commit
c57e05bec1
|
@ -30,7 +30,7 @@ def main(vectors_loc, lang=None):
|
||||||
nlp.vocab.reset_vectors(width=int(nr_dim))
|
nlp.vocab.reset_vectors(width=int(nr_dim))
|
||||||
for line in file_:
|
for line in file_:
|
||||||
line = line.rstrip().decode('utf8')
|
line = line.rstrip().decode('utf8')
|
||||||
pieces = line.rsplit(' ', nr_dim)
|
pieces = line.rsplit(' ', int(nr_dim))
|
||||||
word = pieces[0]
|
word = pieces[0]
|
||||||
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
|
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
|
||||||
nlp.vocab.set_vector(word, vector) # add the vectors to the vocab
|
nlp.vocab.set_vector(word, vector) # add the vectors to the vocab
|
||||||
|
|
Loading…
Reference in New Issue
Block a user