Make fasttext reader accommodate whitespace

This commit is contained in:
Matthew Honnibal 2017-11-12 12:07:13 +01:00 committed by GitHub
parent 94d8b711a3
commit f0e28e8ae5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -30,7 +30,7 @@ def main(vectors_loc, lang=None):
nlp.vocab.reset_vectors(width=int(nr_dim))
for line in file_:
line = line.decode('utf8')
pieces = line.split()
pieces = line.rsplit(' ', nr_dim)
word = pieces[0]
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
nlp.vocab.set_vector(word, vector) # add the vectors to the vocab