|
@@ -2,7 +2,7 @@ from BertDemo import collection, text_to_vector, List, Doc
|
|
|
import pandas as pd
|
|
|
|
|
|
# 创建一个缓存字典,用于存储查询结果
|
|
|
-# query_cache = {}
|
|
|
+query_cache = {}
|
|
|
|
|
|
|
|
|
def vector_to_tuple(vector):
|
|
@@ -14,8 +14,8 @@ def queryCollection(vector) -> List[Doc]:
|
|
|
# 如果向量已经查询过,则直接返回结果
|
|
|
vector_tuple = vector_to_tuple(vector)
|
|
|
|
|
|
- # if vector_tuple in query_cache:
|
|
|
- # return query_cache[vector_tuple]
|
|
|
+ if vector_tuple in query_cache:
|
|
|
+ return query_cache[vector_tuple]
|
|
|
|
|
|
# 根据向量进行相似性检索 + 条件过滤
|
|
|
ret = collection.query(
|
|
@@ -31,52 +31,53 @@ def queryCollection(vector) -> List[Doc]:
|
|
|
return ret.output
|
|
|
|
|
|
|
|
|
-def calculate_ros(row):
|
|
|
+def calculate_rov(row):
|
|
|
print(f"title={row['title']} ")
|
|
|
- vector = text_to_vector(row['title'])
|
|
|
- docs = queryCollection(vector)
|
|
|
- sumRos = 0
|
|
|
- for doc in docs:
|
|
|
- sumRos += doc.fields['rntHeadCount'] / doc.fields['shareCount']
|
|
|
-
|
|
|
try:
|
|
|
- ros = sumRos / len(docs)
|
|
|
+ vector = text_to_vector(row['title'])
|
|
|
+ docs = queryCollection(vector)
|
|
|
+ sumRov = 0
|
|
|
+ for doc in docs:
|
|
|
+ sumRov += doc.fields['rntHeadCount'] / doc.fields['exposureCount']
|
|
|
+ rov = sumRov / len(docs)
|
|
|
except:
|
|
|
- ros = 0
|
|
|
+ rov = 0
|
|
|
|
|
|
- print(f"预测ROS={ros}")
|
|
|
+ print(f"预测ROV={rov}")
|
|
|
print("=====================================")
|
|
|
- return ros
|
|
|
+ return rov
|
|
|
|
|
|
|
|
|
-def calculate_return(row):
|
|
|
- print(f"title={row['title']} ")
|
|
|
- vector = text_to_vector(row['title'])
|
|
|
- docs = queryCollection(vector)
|
|
|
- sumHeadCount = 0
|
|
|
- for doc in docs:
|
|
|
- sumHeadCount += doc.fields['rntHeadCount']
|
|
|
+# def calculate_return(row):
|
|
|
+# print(f"title={row['title']} ")
|
|
|
+# vector = text_to_vector(row['title'])
|
|
|
+# docs = queryCollection(vector)
|
|
|
+# sumHeadCount = 0
|
|
|
+# for doc in docs:
|
|
|
+# sumHeadCount += doc.fields['rntHeadCount']
|
|
|
|
|
|
- try:
|
|
|
- headCount = sumHeadCount / len(docs)
|
|
|
- except:
|
|
|
- headCount = 0
|
|
|
+# try:
|
|
|
+# headCount = sumHeadCount / len(docs)
|
|
|
+# except:
|
|
|
+# headCount = 0
|
|
|
|
|
|
- print(f"预测回流人数={headCount}")
|
|
|
- print("=====================================")
|
|
|
- return headCount
|
|
|
+# print(f"预测回流人数={headCount}")
|
|
|
+# print("=====================================")
|
|
|
+# return headCount
|
|
|
|
|
|
|
|
|
def calculate_and_export(filename):
|
|
|
# 读取表格数据
|
|
|
df = pd.read_excel(filename)
|
|
|
|
|
|
- # 应用计算函数并创建新的列
|
|
|
- df['实际ROS(回流人数/分享次数)'] = df.apply(lambda row: row['回流人数'] /
|
|
|
- row['分享次数'], axis=1)
|
|
|
- df['预测ROS'] = df.apply(calculate_ros, axis=1)
|
|
|
- df['实际回流人数'] = df['回流人数']
|
|
|
- df['预测回流人数'] = df.apply(calculate_return, axis=1)
|
|
|
+ # 应用计算函数并创建新的列, 跳过曝光次数为0的数据
|
|
|
+ df['实际ROV(回流人数/曝光次数)'] = df.apply(lambda row: row['回流人数'] /
|
|
|
+ row['曝光次数'] if row['曝光次数'] != 0 else 0, axis=1)
|
|
|
+ df['预测ROV'] = df.apply(calculate_rov, axis=1)
|
|
|
|
|
|
# 将结果保存回表格
|
|
|
df.to_excel('videos-result.xlsx', index=False)
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ calculate_and_export('20231115_flow_pool.xlsx')
|