VectorInsert.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. import BertEmbedding
  2. import MilvusComponent
  3. import ODPSQueryUtil
  4. from datetime import datetime
  5. def convert_to_timestamp(date_string):
  6. # 解析日期时间字符串
  7. dt = datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S')
  8. # 转换为时间戳
  9. timestamp = dt.timestamp()
  10. return timestamp
  11. def insertEmbeddings(start_idx, limit):
  12. # 1. Get data from ODPS
  13. records = ODPSQueryUtil.query_video_title_perfermance(start_idx, limit)
  14. print(f"Got {len(records)} records")
  15. # 取出所有的videoid
  16. videoids = []
  17. title_vectors = []
  18. titles = []
  19. preview_times_arr = []
  20. preview_users_arr = []
  21. view_times_arr = []
  22. view_users_arr = []
  23. play_times_arr = []
  24. play_users_arr = []
  25. share_times_arr = []
  26. share_users_arr = []
  27. return_times_arr = []
  28. return_users_arr = []
  29. create_time_arr = []
  30. for record in records:
  31. try:
  32. videoids.append(record.videoid)
  33. title_vectors.append(BertEmbedding.text_to_vector(record.title))
  34. titles.append(record.title)
  35. preview_times_arr.append(int(record.preview_times))
  36. preview_users_arr.append(int(record.preview_users))
  37. view_times_arr.append(int(record.view_times))
  38. view_users_arr.append(int(record.view_users))
  39. play_times_arr.append(int(record.play_times))
  40. play_users_arr.append(int(record.play_users))
  41. share_times_arr.append(int(record.share_times))
  42. share_users_arr.append(int(record.share_users))
  43. return_times_arr.append(int(record.return_times))
  44. return_users_arr.append(int(record.return_users))
  45. create_time_arr.append(
  46. int(convert_to_timestamp(record.gmt_create)))
  47. except Exception as e:
  48. print(e)
  49. print(f"Got {len(videoids)} videoids embedding success")
  50. try:
  51. # 3. Insert embedding into Milvus
  52. data = [videoids, title_vectors, titles, preview_times_arr, preview_users_arr,
  53. view_times_arr, view_users_arr, play_times_arr, play_users_arr, share_times_arr,
  54. share_users_arr, return_times_arr, return_users_arr, create_time_arr]
  55. MilvusComponent.insert_vector(data)
  56. except Exception as e:
  57. print(e)
  58. print(f"Inserted {len(videoids)} videoids embedding success")
  59. if __name__ == '__main__':
  60. # for i in range(0, 2200000, 100):
  61. # y = i + 100
  62. # print(f"Inserting {i} to {y}")
  63. # insertEmbeddings(i, 100)
  64. # print(f"Inserted Over {i} to {y}")
  65. vector = BertEmbedding.text_to_vector("老人终会成为儿女的手下败将")
  66. print(vector)