{"id":5463,"date":"2017-10-07T01:37:09","date_gmt":"2017-10-07T01:37:09","guid":{"rendered":"http:\/\/www.garysieling.com\/blog\/?p=5463"},"modified":"2017-10-07T01:37:09","modified_gmt":"2017-10-07T01:37:09","slug":"python-annoy-glove-example","status":"publish","type":"post","link":"https:\/\/www.garysieling.com\/blog\/python-annoy-glove-example\/","title":{"rendered":"Python Annoy + GloVe example"},"content":{"rendered":"<p>Dependencies:<\/p>\n<pre>\npip3 install annoy\n<\/pre>\n<p>Download from:<br \/>\n<a href=\"https:\/\/nlp.stanford.edu\/projects\/glove\/\">https:\/\/nlp.stanford.edu\/projects\/glove\/<\/a><\/p>\n<p>Code:<\/p>\n<pre lang=\"python\">\nfrom annoy import AnnoyIndex\n\nfile = \"\/Users\/gary\/Downloads\/glove.6B\/glove.6B.50d.txt\"\ncontent = []\nwith open(file) as f:\n    content = f.readlines()\n\ncontent = [x.split(\" \") for x in content]\n\nt = AnnoyIndex(50) \nidx = 0\nterms = [i[0] for i in content]\nfor i in content:\n  vec = [float(a) for a in i[1:]]\n  print(len(vec))\n  t.add_item(idx, vec)\n\n  idx = idx + 1\n\nt.build(10) # 10 trees\nt.save('test.ann')\n<\/pre>\n<p>Lookup:<\/p>\n<pre lang=\"python\">\nu = AnnoyIndex(50)\nu.load('test.ann')\nnear = u.get_nns_by_item(0, 10) # nearest 10 terms\nnearWords = [terms[i] for i in near]\nprint(nearWords)\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>Using k-nearest neighbors on glove dataset<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"om_disable_all_campaigns":false,"_monsterinsights_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"footnotes":""},"categories":[4],"tags":[331,447],"aioseo_notices":[],"amp_enabled":true,"_links":{"self":[{"href":"https:\/\/www.garysieling.com\/blog\/wp-json\/wp\/v2\/posts\/5463"}],"collection":[{"href":"https:\/\/www.garysieling.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.garysieling.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.garysieling.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.garysieling.com\/blog\/wp-json\/wp\/v2\/comments?post=5463"}],"version-history":[{"count":0,"href":"https:\/\/www.garysieling.com\/blog\/wp-json\/wp\/v2\/posts\/5463\/revisions"}],"wp:attachment":[{"href":"https:\/\/www.garysieling.com\/blog\/wp-json\/wp\/v2\/media?parent=5463"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.garysieling.com\/blog\/wp-json\/wp\/v2\/categories?post=5463"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.garysieling.com\/blog\/wp-json\/wp\/v2\/tags?post=5463"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}