from BertDemo import collection, text_to_vector, List, Doc import pandas as pd # 创建一个缓存字典,用于存储查询结果 # query_cache = {} def vector_to_tuple(vector): # 将列表转换为元组,以便作为字典的键使用 return tuple(vector) def queryCollection(vector) -> List[Doc]: # 如果向量已经查询过,则直接返回结果 vector_tuple = vector_to_tuple(vector) # if vector_tuple in query_cache: # return query_cache[vector_tuple] # 根据向量进行相似性检索 + 条件过滤 ret = collection.query( vector=vector, # 向量检索,也可设置主键检索 topk=10, # filter='playCount > 1000', include_vector=True ) if ret is None or ret.code != 0: print('查询失败') return None # query_cache[vector_tuple] = ret.output return ret.output def calculate_ros(row): print(f"title={row['title']} ") vector = text_to_vector(row['title']) docs = queryCollection(vector) sumRos = 0 for doc in docs: sumRos += doc.fields['rntHeadCount'] / doc.fields['shareCount'] try: ros = sumRos / len(docs) except: ros = 0 print(f"预测ROS={ros}") print("=====================================") return ros def calculate_return(row): print(f"title={row['title']} ") vector = text_to_vector(row['title']) docs = queryCollection(vector) sumHeadCount = 0 for doc in docs: sumHeadCount += doc.fields['rntHeadCount'] try: headCount = sumHeadCount / len(docs) except: headCount = 0 print(f"预测回流人数={headCount}") print("=====================================") return headCount def calculate_and_export(filename): # 读取表格数据 df = pd.read_excel(filename) # 应用计算函数并创建新的列 df['实际ROS(回流人数/分享次数)'] = df.apply(lambda row: row['回流人数'] / row['分享次数'], axis=1) df['预测ROS'] = df.apply(calculate_ros, axis=1) df['实际回流人数'] = df['回流人数'] df['预测回流人数'] = df.apply(calculate_return, axis=1) # 将结果保存回表格 df.to_excel('videos-result.xlsx', index=False)