process_video.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. #coding utf-8
  2. import sys
  3. import pandas as pd
  4. import numpy as np
  5. import faiss
  6. def gen_i2i(index_item, embeddings,i2i):
  7. fw=open(i2i,"w")
  8. #print(i2i)
  9. embed_matrix=np.array(embeddings).astype('float32')
  10. #print(embed_matrix)
  11. index=faiss.IndexFlatL2(100)
  12. index.add(embed_matrix)
  13. #the candicate matrix is embed_matrix,but the search matrix is the same.
  14. #if the search vector is in the candicate matrix, the return idx>> the first is the search vector itself
  15. #if the search vector is not in the candicate matrix, the return idx>>the first is the index of the candicate
  16. distence_matrix,recall_list=index.search(embed_matrix, 20)
  17. for idx,rec_arr in enumerate(recall_list):
  18. #print("idx:", idx)
  19. orgin_item=str(index_item[idx])
  20. recall_str=""
  21. #rec_arr=[0 6 3 8 7 1]
  22. for re_id in rec_arr[1:]:
  23. recall_idstr=str(index_item[re_id])
  24. #print(recall_idstr)
  25. recall_str=recall_str+","+recall_idstr
  26. fw.write(orgin_item+"\t"+recall_str[1:]+"\n")
  27. if __name__ == '__main__':
  28. f = open(sys.argv[1])
  29. index = 0
  30. index_dict = {}
  31. index_arr = []
  32. while True:
  33. line = f.readline()
  34. if not line:
  35. break
  36. line = line.strip()
  37. #print(line)
  38. items = line.split(" ")
  39. #print(int(items[0]))
  40. try:
  41. vid = int(items[0])
  42. print(line)
  43. #print(str(vid)+"\t"+items[1:])
  44. #print(index_arr)
  45. except:
  46. #print(int(items[0]))
  47. continue
  48. f.close()
  49. #print(len(index_arr))
  50. #gen_i2i(index_dict, index_arr, "i2i_result")