spaCy/examples/vectors_fast_text.py

#!/usr/bin/env python
# coding: utf8
"""Load vectors for a language trained using fastText
https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
"""
from __future__ import unicode_literals
import plac
import numpy

from spacy.language import Language


@plac.annotations(
    vectors_loc=("Path to vectors", "positional", None, str))
def main(vectors_loc):
    nlp = Language()  # start off with a blank Language class
    with open(vectors_loc, 'rb') as file_:
        header = file_.readline()
        nr_row, nr_dim = header.split()
        nlp.vocab.clear_vectors(int(nr_dim))
        for line in file_:
            line = line.decode('utf8')
            pieces = line.split()
            word = pieces[0]
            vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
            nlp.vocab.set_vector(word, vector)  # add the vectors to the vocab
    # test the vectors and similarity
    text = 'class colspan'
    doc = nlp(text)
    print(text, doc[0].similarity(doc[1]))


if __name__ == '__main__':
    plac.call(main)
Clean up examples 2017-10-26 18:32:59 +03:00			`#!/usr/bin/env python`
			`# coding: utf8`
Update fastText example and add to examples in docs 2017-10-26 19:47:02 +03:00			`"""Load vectors for a language trained using fastText`
Add example loadig Fast Text vectors 2017-10-02 00:40:02 +03:00			`https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md`
Clean up examples 2017-10-26 18:32:59 +03:00			`"""`
Add example loadig Fast Text vectors 2017-10-02 00:40:02 +03:00			`from __future__ import unicode_literals`
			`import plac`
			`import numpy`

Fix syntax error 2017-11-01 02:43:28 +03:00			`from spacy.language import Language`
Add example loadig Fast Text vectors 2017-10-02 00:40:02 +03:00

Update fastText example and add to examples in docs 2017-10-26 19:47:02 +03:00			`@plac.annotations(`
			`vectors_loc=("Path to vectors", "positional", None, str))`
Add example loadig Fast Text vectors 2017-10-02 00:40:02 +03:00			`def main(vectors_loc):`
Fix formatting 2017-11-01 02:43:22 +03:00			`nlp = Language() # start off with a blank Language class`
Add example loadig Fast Text vectors 2017-10-02 00:40:02 +03:00			`with open(vectors_loc, 'rb') as file_:`
			`header = file_.readline()`
			`nr_row, nr_dim = header.split()`
			`nlp.vocab.clear_vectors(int(nr_dim))`
			`for line in file_:`
			`line = line.decode('utf8')`
Clean up examples 2017-10-26 18:32:59 +03:00			`pieces = line.split()`
Add example loadig Fast Text vectors 2017-10-02 00:40:02 +03:00			`word = pieces[0]`
			`vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')`
Fix formatting 2017-11-01 02:43:22 +03:00			`nlp.vocab.set_vector(word, vector) # add the vectors to the vocab`
			`# test the vectors and similarity`
			`text = 'class colspan'`
			`doc = nlp(text)`
			`print(text, doc[0].similarity(doc[1]))`
Add example loadig Fast Text vectors 2017-10-02 00:40:02 +03:00

			`if __name__ == '__main__':`
			`plac.call(main)`