12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- from BertDemo import collection, text_to_vector, List, Doc
- import pandas as pd
- # 创建一个缓存字典,用于存储查询结果
- # query_cache = {}
- def vector_to_tuple(vector):
- # 将列表转换为元组,以便作为字典的键使用
- return tuple(vector)
- def queryCollection(vector) -> List[Doc]:
- # 如果向量已经查询过,则直接返回结果
- vector_tuple = vector_to_tuple(vector)
- # if vector_tuple in query_cache:
- # return query_cache[vector_tuple]
- # 根据向量进行相似性检索 + 条件过滤
- ret = collection.query(
- vector=vector, # 向量检索,也可设置主键检索
- topk=10,
- # filter='playCount > 1000',
- include_vector=True
- )
- if ret is None or ret.code != 0:
- print('查询失败')
- return None
- # query_cache[vector_tuple] = ret.output
- return ret.output
- def calculate_ros(row):
- print(f"title={row['title']} ")
- vector = text_to_vector(row['title'])
- docs = queryCollection(vector)
- sumRos = 0
- for doc in docs:
- sumRos += doc.fields['rntHeadCount'] / doc.fields['shareCount']
- try:
- ros = sumRos / len(docs)
- except:
- ros = 0
- print(f"预测ROS={ros}")
- print("=====================================")
- return ros
- def calculate_return(row):
- print(f"title={row['title']} ")
- vector = text_to_vector(row['title'])
- docs = queryCollection(vector)
- sumHeadCount = 0
- for doc in docs:
- sumHeadCount += doc.fields['rntHeadCount']
- try:
- headCount = sumHeadCount / len(docs)
- except:
- headCount = 0
- print(f"预测回流人数={headCount}")
- print("=====================================")
- return headCount
- def calculate_and_export(filename):
- # 读取表格数据
- df = pd.read_excel(filename)
- # 应用计算函数并创建新的列
- df['实际ROS(回流人数/分享次数)'] = df.apply(lambda row: row['回流人数'] /
- row['分享次数'], axis=1)
- df['预测ROS'] = df.apply(calculate_ros, axis=1)
- df['实际回流人数'] = df['回流人数']
- df['预测回流人数'] = df.apply(calculate_return, axis=1)
- # 将结果保存回表格
- df.to_excel('videos-result.xlsx', index=False)
|