yujuwon · April 19, 2018 08:29
diff --git a/other_vector.py b/other_vector.py
 #!/usr/bin/env python
 # coding: utf8
 """Load vectors for a language trained using fastText
 https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
 Compatible with: spaCy v2.0.0+
 """
 from __future__ import unicode_literals
 import plac
 import numpy

 import spacy
 from spacy.language import Language


 @plac.annotations(
    vectors_loc=("Path to .vec file", "positional", None, str),
    lang=("Optional language ID. If not set, blank Language() will be used.",
          "positional", None, str))
 def main(vectors_loc, lang=None):
    if lang is None:
        nlp = Language()
    else:
        # create empty language class – this is required if you're planning to
        # save the model to disk and load it back later (models always need a
        # "lang" setting). Use 'xx' for blank multi-language class.
        nlp = spacy.blank(lang)
    with open(vectors_loc, 'rb') as file_:
        header = file_.readline()
        nr_row, nr_dim = header.split()
        nlp.vocab.reset_vectors(width=int(nr_dim))
        for line in file_:
            line = line.rstrip().decode('utf8')
            pieces = line.rsplit(' ', int(nr_dim))
            word = pieces[0]
            vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
            nlp.vocab.set_vector(word, vector)  # add the vectors to the vocab
    # test the vectors and similarity
    text = 'class colspan'
    doc = nlp(text)
    print(text, doc[0].similarity(doc[1]))


 if __name__ == '__main__':
    plac.call(main)
	#!/usr/bin/env python
	# coding: utf8
	"""Load vectors for a language trained using fastText
	https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md
	Compatible with: spaCy v2.0.0+
	"""
	from __future__ import unicode_literals
	import plac
	import numpy

	import spacy
	from spacy.language import Language


	@plac.annotations(
	vectors_loc=("Path to .vec file", "positional", None, str),
	lang=("Optional language ID. If not set, blank Language() will be used.",
	"positional", None, str))
	def main(vectors_loc, lang=None):
	if lang is None:
	nlp = Language()
	else:
	# create empty language class – this is required if you're planning to
	# save the model to disk and load it back later (models always need a
	# "lang" setting). Use 'xx' for blank multi-language class.
	nlp = spacy.blank(lang)
	with open(vectors_loc, 'rb') as file_:
	header = file_.readline()
	nr_row, nr_dim = header.split()
	nlp.vocab.reset_vectors(width=int(nr_dim))
	for line in file_:
	line = line.rstrip().decode('utf8')
	pieces = line.rsplit(' ', int(nr_dim))
	word = pieces[0]
	vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
	nlp.vocab.set_vector(word, vector) # add the vectors to the vocab
	# test the vectors and similarity
	text = 'class colspan'
	doc = nlp(text)
	print(text, doc[0].similarity(doc[1]))


	if __name__ == '__main__':
	plac.call(main)
No results found