Query.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import BertEmbedding
  2. import MilvusComponent
  3. import pandas as pd
  4. def queryCollection(vector):
  5. # 根据向量进行相似性检索 + 条件过滤
  6. results = MilvusComponent.query(
  7. vector=[vector],
  8. top_k=10,
  9. expr="id >= 15408599",
  10. output_fields=['title', 'preview_users', 'preview_times', 'view_users', 'view_times',
  11. 'play_users', 'play_times', 'share_users', 'share_times', 'return_users', 'return_times']
  12. )
  13. return results
  14. def calculate_and_export(filename):
  15. # 读取表格数据
  16. df = pd.read_excel(filename)
  17. # 遍历DataFrame的每一行
  18. for index, row in df.iterrows():
  19. print(f"title={row['title']} ")
  20. # 计算实际ROV
  21. actual_rov = row['return_cnt'] / \
  22. row['view_cnt'] if row['view_cnt'] != 0 else 0
  23. df.at[index, '实际ROV(回流人数/曝光次数)'] = actual_rov
  24. # 计算预测ROV
  25. try:
  26. vector = BertEmbedding.text_to_vector(row['title'])
  27. results = queryCollection(vector)
  28. for i in range(len(results[0])):
  29. hit = results[0][i]
  30. df.at[index, '相似视频ID'+str(i+1)] = hit.id
  31. df.at[index, '相似标题'+str(i+1)] = hit.entity.get('title')
  32. df.at[index, '相似曝光' +
  33. str(i+1)] = hit.entity.get('view_times')
  34. df.at[index, '相似分享' +
  35. str(i+1)] = hit.entity.get('share_times')
  36. df.at[index, '相似回流' +
  37. str(i+1)] = hit.entity.get('return_users')
  38. df.at[index, '相似内积'+str(i+1)] = hit.distance
  39. except Exception as e:
  40. print(e)
  41. print("Done=====================================")
  42. # 将结果保存回表格
  43. df.to_excel('videos-result.xlsx', index=False)
  44. if __name__ == '__main__':
  45. calculate_and_export('20231124_flowpool.xlsx')
  46. # vector = BertEmbedding.text_to_vector('印度种姓制度有多可怕?看完这些你就知道了')
  47. # results = queryCollection(vector)
  48. # print(results)