|
@@ -0,0 +1,54 @@
|
|
|
+#coding utf-8
|
|
|
+import sys
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+import faiss
|
|
|
+
|
|
|
+
|
|
|
+def gen_i2i(index_item, embeddings,i2i):
|
|
|
+ fw=open(i2i,"w")
|
|
|
+ #print(i2i)
|
|
|
+ embed_matrix=np.array(embeddings).astype('float32')
|
|
|
+ #print(embed_matrix)
|
|
|
+ index=faiss.IndexFlatL2(100)
|
|
|
+ index.add(embed_matrix)
|
|
|
+ #the candicate matrix is embed_matrix,but the search matrix is the same.
|
|
|
+ #if the search vector is in the candicate matrix, the return idx>> the first is the search vector itself
|
|
|
+ #if the search vector is not in the candicate matrix, the return idx>>the first is the index of the candicate
|
|
|
+ distence_matrix,recall_list=index.search(embed_matrix, 20)
|
|
|
+ for idx,rec_arr in enumerate(recall_list):
|
|
|
+ #print("idx:", idx)
|
|
|
+ orgin_item=str(index_item[idx])
|
|
|
+ recall_str=""
|
|
|
+ #rec_arr=[0 6 3 8 7 1]
|
|
|
+ for re_id in rec_arr[1:]:
|
|
|
+ recall_idstr=str(index_item[re_id])
|
|
|
+ #print(recall_idstr)
|
|
|
+ recall_str=recall_str+","+recall_idstr
|
|
|
+ fw.write(orgin_item+"\t"+recall_str[1:]+"\n")
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ f = open(sys.argv[1])
|
|
|
+ index = 0
|
|
|
+ index_dict = {}
|
|
|
+ index_arr = []
|
|
|
+ while True:
|
|
|
+ line = f.readline()
|
|
|
+ if not line:
|
|
|
+ break
|
|
|
+ line = line.strip()
|
|
|
+ #print(eval(line))
|
|
|
+ items = line.split(" ")
|
|
|
+ try:
|
|
|
+ vid = int(items[0])
|
|
|
+ vid_vec = eval(" ".join(items[1:]))
|
|
|
+ index_arr.append(vid_vec)
|
|
|
+ #index +=1
|
|
|
+ index_dict[index] = vid
|
|
|
+ index +=1
|
|
|
+ #print(index_arr)
|
|
|
+ except:
|
|
|
+ continue
|
|
|
+ f.close()
|
|
|
+ print(len(index_arr))
|
|
|
+ gen_i2i(index_dict, index_arr, "i2i_result")
|