mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-02 13:26:32 +03:00
31 lines
823 B
Python
31 lines
823 B
Python
|
'''Load vectors for a language trained using FastText
|
||
|
|
||
|
https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
|
||
|
'''
|
||
|
from __future__ import unicode_literals
|
||
|
import plac
|
||
|
import numpy
|
||
|
|
||
|
import spacy.language
|
||
|
|
||
|
|
||
|
def main(vectors_loc):
|
||
|
nlp = spacy.language.Language()
|
||
|
|
||
|
with open(vectors_loc, 'rb') as file_:
|
||
|
header = file_.readline()
|
||
|
nr_row, nr_dim = header.split()
|
||
|
nlp.vocab.clear_vectors(int(nr_dim))
|
||
|
for line in file_:
|
||
|
line = line.decode('utf8')
|
||
|
pieces = line.split()
|
||
|
word = pieces[0]
|
||
|
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
|
||
|
nlp.vocab.set_vector(word, vector)
|
||
|
doc = nlp(u'class colspan')
|
||
|
print(doc[0].similarity(doc[1]))
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
plac.call(main)
|