from BertDemo import collection, text_to_vector, List, Doc import pandas as pd # 创建一个缓存字典,用于存储查询结果 query_cache = {} def vector_to_tuple(vector): # 将列表转换为元组,以便作为字典的键使用 return tuple(vector) def queryCollection(vector) -> List[Doc]: # 如果向量已经查询过,则直接返回结果 vector_tuple = vector_to_tuple(vector) if vector_tuple in query_cache: return query_cache[vector_tuple] # 根据向量进行相似性检索 + 条件过滤 ret = collection.query( vector=vector, # 向量检索,也可设置主键检索 topk=10, # filter='playCount > 1000', include_vector=True ) if ret is None or ret.code != 0: print('查询失败') return None # query_cache[vector_tuple] = ret.output return ret.output def calculate_rov(row): print(f"title={row['title']} ") try: vector = text_to_vector(row['title']) docs = queryCollection(vector) sumRov = 0 for doc in docs: sumRov += doc.fields['rntHeadCount'] / doc.fields['exposureCount'] rov = sumRov / len(docs) except: rov = 0 print(f"预测ROV={rov}") print("=====================================") return rov # def calculate_return(row): # print(f"title={row['title']} ") # vector = text_to_vector(row['title']) # docs = queryCollection(vector) # sumHeadCount = 0 # for doc in docs: # sumHeadCount += doc.fields['rntHeadCount'] # try: # headCount = sumHeadCount / len(docs) # except: # headCount = 0 # print(f"预测回流人数={headCount}") # print("=====================================") # return headCount def calculate_and_export(filename): # 读取表格数据 df = pd.read_excel(filename) # 应用计算函数并创建新的列, 跳过曝光次数为0的数据 df['实际ROV(回流人数/曝光次数)'] = df.apply(lambda row: row['回流人数'] / row['曝光次数'] if row['曝光次数'] != 0 else 0, axis=1) df['预测ROV'] = df.apply(calculate_rov, axis=1) # 将结果保存回表格 df.to_excel('videos-result.xlsx', index=False) if __name__ == '__main__': calculate_and_export('20231115_flow_pool.xlsx')