import BertEmbedding import MilvusComponent import pandas as pd def queryCollection(vector): # 根据向量进行相似性检索 + 条件过滤 results = MilvusComponent.query( vector=[vector], top_k=10, expr="id >= 15408599", output_fields=['title', 'preview_users', 'preview_times', 'view_users', 'view_times', 'play_users', 'play_times', 'share_users', 'share_times', 'return_users', 'return_times'] ) return results def calculate_and_export(filename): # 读取表格数据 df = pd.read_excel(filename) # 遍历DataFrame的每一行 for index, row in df.iterrows(): print(f"title={row['title']} ") # 计算实际ROV actual_rov = row['return_cnt'] / \ row['view_cnt'] if row['view_cnt'] != 0 else 0 df.at[index, '实际ROV(回流人数/曝光次数)'] = actual_rov # 计算预测ROV try: vector = BertEmbedding.text_to_vector(row['title']) results = queryCollection(vector) for i in range(len(results[0])): hit = results[0][i] df.at[index, '相似视频ID'+str(i+1)] = hit.id df.at[index, '相似标题'+str(i+1)] = hit.entity.get('title') df.at[index, '相似曝光' + str(i+1)] = hit.entity.get('view_times') df.at[index, '相似分享' + str(i+1)] = hit.entity.get('share_times') df.at[index, '相似回流' + str(i+1)] = hit.entity.get('return_users') df.at[index, '相似内积'+str(i+1)] = hit.distance except Exception as e: print(e) print("Done=====================================") # 将结果保存回表格 df.to_excel('videos-result.xlsx', index=False) if __name__ == '__main__': calculate_and_export('20231124_flowpool.xlsx') # vector = BertEmbedding.text_to_vector('印度种姓制度有多可怕?看完这些你就知道了') # results = queryCollection(vector) # print(results)