#coding utf-8 import sys import pandas as pd import numpy as np import faiss import time def gen_i2i(index_item, embeddings,i2i): fw=open(i2i,"w") #print(i2i) embed_matrix=np.array(embeddings).astype('float32') #print(embed_matrix) index=faiss.IndexFlatL2(100) index.add(embed_matrix) #the candicate matrix is embed_matrix,but the search matrix is the same. #if the search vector is in the candicate matrix, the return idx>> the first is the search vector itself #if the search vector is not in the candicate matrix, the return idx>>the first is the index of the candicate distence_matrix,recall_list=index.search(embed_matrix, 20) for idx,rec_arr in enumerate(recall_list): #print("idx:", idx) orgin_item=str(index_item[idx]) recall_str="" #rec_arr=[0 6 3 8 7 1] for re_id in rec_arr[1:]: recall_idstr=str(index_item[re_id]) #print(recall_idstr) recall_str=recall_str+","+recall_idstr fw.write(orgin_item+"\t"+recall_str[1:]+"\n") if __name__ == '__main__': f = open(sys.argv[1]) index = 0 start_time = time.time() index_dict = {} index_arr = [] while True: line = f.readline() if not line: break line = line.strip().replace("[","").replace("]","") #print(eval(line)) items = line.split(" ") if len(items)<2: continue try: vid = int(items[0]) #vid_vec = items[1:] print(line.split(" ")) vid_vec = eval(" ".join(items[1:])) index_arr.append(vid_vec) index_dict[index] = vid index +=1 except: continue f.close() print(len(index_arr)) end_time = time.time() print("time:", (end_time-start_time)) #gen_i2i(index_dict, index_arr, "i2i_result")