| 
					
				 | 
			
			
				@@ -2,7 +2,7 @@ from BertDemo import collection, text_to_vector, List, Doc 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import pandas as pd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # 创建一个缓存字典,用于存储查询结果 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-# query_cache = {} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+query_cache = {} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def vector_to_tuple(vector): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -14,8 +14,8 @@ def queryCollection(vector) -> List[Doc]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 如果向量已经查询过,则直接返回结果 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     vector_tuple = vector_to_tuple(vector) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # if vector_tuple in query_cache: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    #     return query_cache[vector_tuple] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if vector_tuple in query_cache: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return query_cache[vector_tuple] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 根据向量进行相似性检索 + 条件过滤 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     ret = collection.query( 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -31,52 +31,53 @@ def queryCollection(vector) -> List[Doc]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return ret.output 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def calculate_ros(row): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def calculate_rov(row): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     print(f"title={row['title']} ") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    vector = text_to_vector(row['title']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    docs = queryCollection(vector) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    sumRos = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    for doc in docs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sumRos += doc.fields['rntHeadCount'] / doc.fields['shareCount'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ros = sumRos / len(docs) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        vector = text_to_vector(row['title']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        docs = queryCollection(vector) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        sumRov = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for doc in docs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            sumRov += doc.fields['rntHeadCount'] / doc.fields['exposureCount'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        rov = sumRov / len(docs) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ros = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        rov = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(f"预测ROS={ros}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print(f"预测ROV={rov}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     print("=====================================") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return ros 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return rov 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def calculate_return(row): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(f"title={row['title']} ") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    vector = text_to_vector(row['title']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    docs = queryCollection(vector) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    sumHeadCount = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    for doc in docs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sumHeadCount += doc.fields['rntHeadCount'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# def calculate_return(row): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     print(f"title={row['title']} ") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     vector = text_to_vector(row['title']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     docs = queryCollection(vector) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     sumHeadCount = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     for doc in docs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#         sumHeadCount += doc.fields['rntHeadCount'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        headCount = sumHeadCount / len(docs) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        headCount = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#         headCount = sumHeadCount / len(docs) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#         headCount = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(f"预测回流人数={headCount}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print("=====================================") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return headCount 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     print(f"预测回流人数={headCount}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     print("=====================================") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+#     return headCount 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def calculate_and_export(filename): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 读取表格数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     df = pd.read_excel(filename) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # 应用计算函数并创建新的列 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df['实际ROS(回流人数/分享次数)'] = df.apply(lambda row: row['回流人数'] / 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                      row['分享次数'], axis=1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df['预测ROS'] = df.apply(calculate_ros, axis=1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df['实际回流人数'] = df['回流人数'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    df['预测回流人数'] = df.apply(calculate_return, axis=1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # 应用计算函数并创建新的列, 跳过曝光次数为0的数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    df['实际ROV(回流人数/曝光次数)'] = df.apply(lambda row: row['回流人数'] / 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                      row['曝光次数'] if row['曝光次数'] != 0 else 0, axis=1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    df['预测ROV'] = df.apply(calculate_rov, axis=1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 将结果保存回表格 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     df.to_excel('videos-result.xlsx', index=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+if __name__ == '__main__': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    calculate_and_export('20231115_flow_pool.xlsx') 
			 |