#coding utf-8 import sys import pandas as pd import numpy as np import faiss def gen_i2i(index_item, embeddings,i2i): fw=open(i2i,"w") #print(i2i) embed_matrix=np.array(embeddings).astype('float32') #print(embed_matrix) index=faiss.IndexFlatL2(100) index.add(embed_matrix) #the candicate matrix is embed_matrix,but the search matrix is the same. #if the search vector is in the candicate matrix, the return idx>> the first is the search vector itself #if the search vector is not in the candicate matrix, the return idx>>the first is the index of the candicate distence_matrix,recall_list=index.search(embed_matrix, 20) for idx,rec_arr in enumerate(recall_list): #print("idx:", idx) orgin_item=str(index_item[idx]) recall_str="" #rec_arr=[0 6 3 8 7 1] for re_id in rec_arr[1:]: recall_idstr=str(index_item[re_id]) #print(recall_idstr) recall_str=recall_str+","+recall_idstr fw.write(orgin_item+"\t"+recall_str[1:]+"\n") if __name__ == '__main__': f = open(sys.argv[1]) index = 0 index_dict = {} index_arr = [] while True: line = f.readline() if not line: break line = line.strip() #print(line) items = line.split(" ") #print(int(items[0])) try: vid = int(items[0]) print(line) #print(str(vid)+"\t"+items[1:]) #print(index_arr) except: #print(int(items[0])) continue f.close() #print(len(index_arr)) #gen_i2i(index_dict, index_arr, "i2i_result")