Ver código fonte

Merge branch 'feature_2023101111_liqian_add_abtest' into pre-master

liqian 1 ano atrás
pai
commit
52252318b9

+ 47 - 29
config.py

@@ -397,6 +397,19 @@ class BaseConfig(object):
             # 其余表与地域小时级表,不做去重,召回在线去重
             'rule19': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
                        'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2, 'dup_remove': False},
+            # 分值计算公式 增加h-2分享当前小时回流数据、h-3分享当前小时回流数据特征
+            # score = k2 * sharerate * (backrate * LOG(lastonehour_return+1) + backrate_2 * LOG(lasttwohour_return_now+1) + backrate_3 * LOG(lastthreehour_return_now+1))
+            'rule20': {'view_type': 'video-show-region', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule2',
+                       '24h_rule_key': 'rule3', 'score_func': 'add_backrate*log(return+1)'},
+            # 分值计算公式 增加[h-3,h-2]之间的回流留存特征
+            # score = k2 * sharerate * backrate * LOG(lastonehour_return+1) * 前两小时回流留存
+            'rule21': {'view_type': 'video-show-region', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule2',
+                       '24h_rule_key': 'rule3', 'score_func': 'multiply_return_retention'},
+            # 分值计算公式 增加h-2分享当前小时回流/h-2分享、h-3分享当前小时回流/h-3分享 特征
+            # score = k2 * sharerate * (backrate + backrate * backrate_2 * backrate_3) * LOG(lastonehour_return+1)
+            'rule22': {'view_type': 'video-show-region', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule2',
+                       '24h_rule_key': 'rule3', 'score_func': 'update_backrate'},
+
         },
         'data_params': DATA_PARAMS,
         'params_list': [
@@ -427,6 +440,9 @@ class BaseConfig(object):
             # {'data': 'data1', 'rule': 'rule17'},  # 215 vlog
             # {'data': 'data1', 'rule': 'rule18'},  # 224 vlog
             {'data': 'videos5', 'rule': 'rule7-1'},  # 428 [内容精选]
+            {'data': 'data1', 'rule': 'rule20'},  # 461 vlog 分值计算公式 增加h-2分享当前小时回流数据、h-3分享当前小时回流数据特征
+            {'data': 'data1', 'rule': 'rule21'},  # 462 vlog 分值计算公式 增加[h-3,h-2]之间的回流留存特征
+            {'data': 'data1', 'rule': 'rule22'},  # 463 vlog 分值计算公式 增加h-2分享当前小时回流/h-2分享、h-3分享当前小时回流/h-3分享 特征
         ],
         'params_list_new': [
             {'data': 'data10', 'rule': 'rule19'},  # 316 票圈视频 + 召回在线去重
@@ -630,6 +646,8 @@ class BaseConfig(object):
     # appType = 6, ROV召回池redis key前缀,完整格式:com.weiqu.video.recall.hot.apptype.h.item.score.{appType}.{h}
     # RECALL_KEY_NAME_PREFIX_APP_TYPE = 'com.weiqu.video.recall.hot.apptype.h.item.score.'
 
+    # 流量池分发实验配置(对照组与实验组划分)存放 redis key
+    FLOWPOOL_ABTEST_KEY_NAME = 'flow:pool:abtest:config'
     # 流量池离线模型结果存放 redis key前缀,完整格式 flow:pool:item:score:{appType}
     FLOWPOOL_KEY_NAME_PREFIX = 'flow:pool:item:score:'
     # 快速曝光流量池数据存放 redis key前缀,完整格式 flow:pool:quick:item:score:{appType}:{flowPool_id}
@@ -1565,48 +1583,48 @@ class BaseConfig(object):
             'max_update_step': 5,  # 最大调整步数
             # 调整步长
             'threshold_update': {
-                'ab0': 1 / 144,
-                'ab1': 1 / 144,
-                'ab2': 1 / 144,
-                'ab3': 1 / 144,
-                'ab4': 1 / 144,
-                'ab5': 1 / 144,
-                'ab6': 1 / 144,
-                'ab7': 1 / 144,
-                'ab8': 1 / 144,
-                'ab9': 1 / 144,
+                'ab0': 1 / 48,
+                'ab1': 1 / 288,
+                'ab2': 1 / 288,
+                'ab3': 1 / 288,
+                'ab4': 1 / 288,
+                'ab5': 1 / 288,
+                'ab6': 1 / 288,
+                'ab7': 1 / 288,
+                'ab8': 1 / 288,
+                'ab9': 1 / 288,
             },
             # 分时段控制目标uv参数
             'target_uv_param': {
                 'ab0': {
                     'update_hours': list(range(7)), 'update_param': 0,
                     'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.01,
-                                              'special_max_update_step': 38}
+                                              'special_max_update_step': 23}
                 },  # 0-7点,uv控制在0%
                 'ab1': {
                     'update_hours': list(range(7)), 'update_param': 0,
-                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.01,
-                                              'special_max_update_step': 38}
+                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.005,
+                                              'special_max_update_step': 71}
                 },  # 0-7点,uv控制在0%
                 'ab2': {
                     'update_hours': list(range(7)), 'update_param': 0,
-                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.01,
-                                              'special_max_update_step': 38}
+                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.005,
+                                              'special_max_update_step': 71}
                 },  # 0-7点,uv控制在0%
                 'ab3': {
                     'update_hours': list(range(7)), 'update_param': 0,
-                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.01,
-                                              'special_max_update_step': 38}
+                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.005,
+                                              'special_max_update_step': 71}
                 },  # 0-7点,uv控制在0%
                 'ab4': {
                     'update_hours': list(range(7)), 'update_param': 0,
-                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.01,
-                                              'special_max_update_step': 38}
+                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.005,
+                                              'special_max_update_step': 71}
                 },  # 0-7点,uv控制在0%
                 'ab5': {
                     'update_hours': list(range(7)), 'update_param': 0,
-                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.01,
-                                              'special_max_update_step': 38}
+                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.005,
+                                              'special_max_update_step': 71}
                 },  # 0-7点,uv控制在0%
                 # 'ab6': {
                 #     'update_hours': list(range(9)), 'update_param': 1/2,
@@ -1615,23 +1633,23 @@ class BaseConfig(object):
                 # },  # 0-9点,uv控制在设定目标uv的1/2
                 'ab6': {
                     'update_hours': list(range(7)), 'update_param': 0,
-                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.01,
-                                              'special_max_update_step': 38}
+                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.005,
+                                              'special_max_update_step': 71}
                 },  # 0-7点,uv控制在设定目标uv的0%
                 'ab7': {
                     'update_hours': list(range(7)), 'update_param': 0,
-                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.01,
-                                              'special_max_update_step': 38}
+                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.005,
+                                              'special_max_update_step': 71}
                 },  # 0-9点,uv控制在0%
                 'ab8': {
                     'update_hours': list(range(7)), 'update_param': 0,
-                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.01,
-                                              'special_max_update_step': 38}
+                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.005,
+                                              'special_max_update_step': 71}
                 },  # 0-7点,uv控制在0%
                 'ab9': {
                     'update_hours': list(range(7)), 'update_param': 0,
-                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.01,
-                                              'special_max_update_step': 38}
+                    'special_update_config': {'special_hours': [0, 1, 7, 8], 'special_gradient': 0.005,
+                                              'special_max_update_step': 71}
                 },  # 0-7点,uv控制在0%
             },
         },

+ 4 - 4
flow_pool_task.sh

@@ -1,11 +1,11 @@
 source /etc/profile
 echo $ROV_OFFLINE_ENV
 if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
-#    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/pool_predict.py
+    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/pool_predict.py
 #    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/flowpool_data_update.py
-    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/flowpool_data_update_with_level.py
+#    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/flowpool_data_update_with_level.py
 elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
-#    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/pool_predict.py
+    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/pool_predict.py
 #    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/flowpool_data_update.py
-    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/flowpool_data_update_with_level.py
+#    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/flowpool_data_update_with_level.py
 fi

+ 24 - 0
flowpool_abtest_config_update.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+# @ModuleName: flowpool_abtest_config_update
+# @Author: Liqian
+# @Time: 2023/9/23 13:31
+# @Software: PyCharm
+import json
+from config import set_config
+from log import Log
+from db_helper import RedisHelper
+
+config_, _ = set_config()
+log_ = Log()
+
+flow_pool_abtest_config = {
+    'control_group': [8, 10, 12, 14, 16],
+    'experimental_flow_set_level': [7, 9, 11, 13, 15]
+}
+log_.info(f"flow_pool_abtest_config: {flow_pool_abtest_config}")
+redis_helper = RedisHelper()
+redis_helper.set_data_to_redis(
+    key_name=config_.FLOWPOOL_ABTEST_KEY_NAME, value=json.dumps(flow_pool_abtest_config), expire_time=365 * 24 * 3600
+)
+redis_helper.persist_key(key_name=config_.FLOWPOOL_ABTEST_KEY_NAME)
+log_.info(f"flow_pool_abtest_config update finished!")

+ 27 - 2
flowpool_data_update_with_level.py

@@ -4,6 +4,7 @@ import time
 import os
 import traceback
 import random
+import json
 
 from config import set_config
 from utils import request_post, filter_video_status, send_msg_to_feishu, filter_video_status_app, \
@@ -142,7 +143,7 @@ def online_flow_pool_data_to_redis(app_type, video_ids_set, video_info_data):
                                                        expire_time=3600)
 
 
-def get_flow_pool_data(app_type, video_info_list):
+def get_flow_pool_data(app_type, video_info_list, flow_pool_id_list):
     """
     获取流量池可分发视频,并将结果上传Redis
     :param app_type: 产品标识 type-int
@@ -161,6 +162,10 @@ def get_flow_pool_data(app_type, video_info_list):
         log_.info('流量池中视频数:{}'.format(len(videos)))
         mapping = {}
         for video in videos:
+            flow_pool_id = video['flowPoolId']
+            if int(flow_pool_id) not in flow_pool_id_list:
+                continue
+            # print(f"flow_pool_id: {flow_pool_id}")
             video_id = video['videoId']
             video_ids.add(video_id)
             item_info = {'flowPool': video['flowPool'], 'level': video['level']}
@@ -168,6 +173,7 @@ def get_flow_pool_data(app_type, video_info_list):
                 mapping[video_id].append(item_info)
             else:
                 mapping[video_id] = [item_info]
+        log_.info(f"需更新流量池视频数: {len(video_ids)}")
 
         # 对视频状态进行过滤
         if app_type == config_.APP_TYPE['APP']:
@@ -226,8 +232,10 @@ def get_flow_pool_data(app_type, video_info_list):
                                                expire_time=15 * 60)
 
         # 普通流量池视频写入redis - 分层存储
+        level_list = []
         for level, videos in redis_data.items():
             log_.info(f"level: {level}, videos_count: {len(videos)}")
+            level_list.append(level)
             flow_pool_key_name = f"{config_.FLOWPOOL_KEY_NAME_PREFIX_SET_LEVEL}{app_type}:{level}"
             # 如果key已存在,删除key
             if redis_helper.key_exists(flow_pool_key_name):
@@ -236,6 +244,14 @@ def get_flow_pool_data(app_type, video_info_list):
             if videos:
                 redis_helper.add_data_with_set(key_name=flow_pool_key_name, values=videos, expire_time=24 * 3600)
 
+        # 删除此时不存在的level key
+        for i in range(1, 7):
+            if i not in level_list:
+                flow_pool_key_name = f"{config_.FLOWPOOL_KEY_NAME_PREFIX_SET_LEVEL}{app_type}:{i}"
+                # 如果key已存在,删除key
+                if redis_helper.key_exists(flow_pool_key_name):
+                    redis_helper.del_keys(flow_pool_key_name)
+
         log_.info('data to redis finished!')
 
         return video_info_list
@@ -278,6 +294,14 @@ if __name__ == '__main__':
     time.sleep(60)
     app_type_list = [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]
     log_.info('flow pool predict start...')
+    # 获取对应流量池id列表
+    redis_helper = RedisHelper()
+    flow_pool_abtest_config = redis_helper.get_data_from_redis(key_name=config_.FLOWPOOL_ABTEST_KEY_NAME)
+    if flow_pool_abtest_config is not None:
+        flow_pool_abtest_config = json.loads(flow_pool_abtest_config)
+    else:
+        flow_pool_abtest_config = {}
+    flow_pool_id_list = flow_pool_abtest_config.get('experimental_flow_set_level', [])
     video_info_list = []
     for app_name, app_type in config_.APP_TYPE.items():
         log_.info('{} predict start...'.format(app_name))
@@ -286,7 +310,8 @@ if __name__ == '__main__':
         elif app_type == config_.APP_TYPE['ZUI_JING_QI']:
             continue
         else:
-            video_info_list = get_flow_pool_data(app_type=app_type, video_info_list=video_info_list)
+            video_info_list = get_flow_pool_data(app_type=app_type, video_info_list=video_info_list,
+                                                 flow_pool_id_list=flow_pool_id_list)
         log_.info('{} predict end...'.format(app_name))
 
     # 更新剩余分发数

+ 11 - 0
flowpool_data_update_with_level_task.sh

@@ -0,0 +1,11 @@
+source /etc/profile
+echo $ROV_OFFLINE_ENV
+if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
+#    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/pool_predict.py &
+#    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/flowpool_data_update.py
+    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/flowpool_data_update_with_level.py
+elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
+#    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/pool_predict.py
+#    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/flowpool_data_update.py
+    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/flowpool_data_update_with_level.py
+fi

+ 1 - 0
flowpool_level_weight_update.py

@@ -20,4 +20,5 @@ redis_helper = RedisHelper()
 redis_helper.set_data_to_redis(
     key_name=config_.FLOWPOOL_LEVEL_WEIGHT_KEY_NAME, value=json.dumps(flowpool_level_weight), expire_time=365 * 24 *3600
 )
+redis_helper.persist_key(key_name=config_.FLOWPOOL_LEVEL_WEIGHT_KEY_NAME)
 log_.info(f"flowpool_level_weight update finished!")

+ 30 - 3
pool_predict.py

@@ -1,4 +1,5 @@
 import datetime
+import json
 import random
 import time
 import os
@@ -137,29 +138,41 @@ def online_flow_pool_data_to_redis(app_type, video_ids_set, video_info_data):
                                                        expire_time=3600)
 
 
-def predict(app_type, video_info_list):
+def predict(app_type, video_info_list, flow_pool_id_list):
     """
     对流量池视频排序,并将结果上传Redis
     :param app_type: 产品标识 type-int
     :return: None
     """
     try:
+        redis_helper = RedisHelper()
+        flow_pool_key_name = f"{config_.FLOWPOOL_KEY_NAME_PREFIX}{app_type}"
+        quick_flow_pool_key_name = f"{config_.QUICK_FLOWPOOL_KEY_NAME_PREFIX}{app_type}:{config_.QUICK_FLOW_POOL_ID}"
         # 从流量池获取数据
         videos = get_videos_from_flow_pool(app_type=app_type)
         if len(videos) <= 0:
             log_.info('流量池中无需分发的视频')
+            # 清空数据
+            for key in [flow_pool_key_name, quick_flow_pool_key_name]:
+                if redis_helper.key_exists(key):
+                    redis_helper.del_keys(key)
             return video_info_list
         # video_id 与 flow_pool 进行mapping
         video_ids = set()
         log_.info('流量池中视频数:{}'.format(len(videos)))
         mapping = {}
         for video in videos:
+            flow_pool_id = video['flowPoolId']
+            if int(flow_pool_id) not in flow_pool_id_list:
+                continue
+            # print(f"flow_pool_id: {flow_pool_id}")
             video_id = video['videoId']
             video_ids.add(video_id)
             if video_id in mapping:
                 mapping[video_id].append(video['flowPool'])
             else:
                 mapping[video_id] = [video['flowPool']]
+        log_.info(f"需更新流量池视频数: {len(video_ids)}")
 
         # 对视频状态进行过滤
         if app_type == config_.APP_TYPE['APP']:
@@ -174,6 +187,10 @@ def predict(app_type, video_info_list):
 
         if not filtered_videos:
             log_.info('流量池中视频状态不符合分发')
+            # 清空数据
+            for key in [flow_pool_key_name, quick_flow_pool_key_name]:
+                if redis_helper.key_exists(key):
+                    redis_helper.del_keys(key)
             return video_info_list
 
         # 预测
@@ -213,7 +230,6 @@ def predict(app_type, video_info_list):
                 video_info_data['flow_pool'][video_id] = flow_pool_list
 
         # 快速曝光流量池视频写入redis
-        redis_helper = RedisHelper()
         quick_flow_pool_key_name = f"{config_.QUICK_FLOWPOOL_KEY_NAME_PREFIX}{app_type}:{config_.QUICK_FLOW_POOL_ID}"
         # 如果key已存在,删除key
         if redis_helper.key_exists(quick_flow_pool_key_name):
@@ -375,8 +391,18 @@ def predict_19(app_type):
 
 
 if __name__ == '__main__':
+    # 为避免第一个app_type获取数据不全,等待1min
+    time.sleep(50)
     app_type_list = [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]
     log_.info('flow pool predict start...')
+    # 获取对应流量池id列表
+    redis_helper = RedisHelper()
+    flow_pool_abtest_config = redis_helper.get_data_from_redis(key_name=config_.FLOWPOOL_ABTEST_KEY_NAME)
+    if flow_pool_abtest_config is not None:
+        flow_pool_abtest_config = json.loads(flow_pool_abtest_config)
+    else:
+        flow_pool_abtest_config = {}
+    flow_pool_id_list = flow_pool_abtest_config.get('control_group', [])
     video_info_list = []
     for app_name, app_type in config_.APP_TYPE.items():
         log_.info('{} predict start...'.format(app_name))
@@ -387,7 +413,8 @@ if __name__ == '__main__':
             # predict_19(app_type=app_type)
             continue
         else:
-            video_info_list = predict(app_type=app_type, video_info_list=video_info_list)
+            video_info_list = predict(app_type=app_type, video_info_list=video_info_list,
+                                      flow_pool_id_list=flow_pool_id_list)
         log_.info('{} predict end...'.format(app_name))
 
     # 更新剩余分发数

+ 126 - 1
region_rule_rank_h.py

@@ -45,6 +45,12 @@ features = [
     'platform_return',
     'lastonehour_show',  # 不区分地域
     'lastonehour_show_region',  # 地域分组
+    'lasttwohour_share',  # h-2小时分享人数
+    'lasttwohour_return_now',  # h-2分享,过去1小时回流人数
+    'lasttwohour_return',  # h-2分享,h-2回流人数
+    'lastthreehour_share',  # h-3小时分享人数
+    'lastthreehour_return_now',  # h-3分享,过去1小时回流人数
+    'lastthreehour_return',  # h-3分享,h-3回流人数
 ]
 
 
@@ -125,7 +131,7 @@ def get_feature_data(project, table, now_date):
     return feature_df
 
 
-def cal_score(df, param):
+def cal_score_initial(df, param):
     """
     计算score
     :param df: 特征数据
@@ -166,6 +172,125 @@ def cal_score(df, param):
     return df
 
 
+def cal_score_add_return(df, param):
+    # score计算公式: sharerate*(backrate*logback + backrate2*logback_now2 + backrate3*logback_now3)*ctr
+    # sharerate = lastonehour_share/(lastonehour_play+1000)
+    # backrate = lastonehour_return/(lastonehour_share+10)
+    # backrate2 = lasttwohour_return_now/(lasttwohour_share+10)
+    # backrate3 = lastthreehour_return_now/(lastthreehour_share+10)
+    # ctr = lastonehour_play/(lastonehour_preview+1000), 对ctr限最大值:K2 = 0.6 if ctr > 0.6 else ctr
+    # score = k2 * sharerate * (backrate * LOG(lastonehour_return+1) + backrate_2 * LOG(lasttwohour_return_now+1) + backrate_3 * LOG(lastthreehour_return_now+1))
+
+    df = df.fillna(0)
+    df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
+    df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
+    df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
+    df['back_rate2'] = df['lasttwohour_return_now'] / (df['lasttwohour_share'] + 10)
+    df['log_back2'] = (df['lasttwohour_return_now'] + 1).apply(math.log)
+    df['back_rate3'] = df['lastthreehour_return_now'] / (df['lastthreehour_share'] + 10)
+    df['log_back3'] = (df['lastthreehour_return_now'] + 1).apply(math.log)
+
+    if param.get('view_type', None) == 'video-show':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
+    elif param.get('view_type', None) == 'video-show-region':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
+    else:
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
+    df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
+
+    df['score'] = df['K2'] * df['share_rate'] * (
+            df['back_rate'] * df['log_back'] +
+            df['back_rate2'] * df['log_back2'] +
+            df['back_rate3'] * df['log_back3']
+    )
+
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
+
+
+def cal_score_multiply_return_retention(df, param):
+    # score计算公式: k2 * sharerate * backrate * LOG(lastonehour_return+1) * 前两小时回流留存
+    # sharerate = lastonehour_share/(lastonehour_play+1000)
+    # backrate = lastonehour_return/(lastonehour_share+10)
+    # ctr = lastonehour_play/(lastonehour_preview+1000), 对ctr限最大值:K2 = 0.6 if ctr > 0.6 else ctr
+    # 前两小时回流留存 return_retention_initial = (lasttwohour_return_now + lastthreehour_return_now)/(lasttwohour_return + lastthreehour_return + 1)
+    # return_retention = 0.5 if return_retention_initial == 0 else return_retention_initial
+    # score = k2 * sharerate * backrate * LOG(lastonehour_return+1) * return_retention
+
+    df = df.fillna(0)
+    df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
+    df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
+    df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
+
+    if param.get('view_type', None) == 'video-show':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
+    elif param.get('view_type', None) == 'video-show-region':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
+    else:
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
+    df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
+
+    df['return_retention_initial'] = (df['lasttwohour_return_now'] + df['lastthreehour_return_now']) / \
+                                     (df['lasttwohour_return'] + df['lastthreehour_return'] + 1)
+    df['return_retention'] = df['return_retention_initial'].apply(lambda x: 0.5 if x == 0 else x)
+
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
+
+    df['score'] = df['K2'] * df['share_rate'] * df['back_rate'] * df['log_back'] * df['return_retention']
+
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
+
+
+def cal_score_update_backrate(df, param):
+    # score计算公式: k2 * sharerate * (backrate + backrate * backrate_2 * backrate_3) * LOG(lastonehour_return+1)
+    # sharerate = lastonehour_share/(lastonehour_play+1000)
+    # backrate = lastonehour_return/(lastonehour_share+10)
+    # backrate2 = lasttwohour_return_now/(lasttwohour_share+10)
+    # backrate3 = lastthreehour_return_now/(lastthreehour_share+10)
+    # ctr = lastonehour_play/(lastonehour_preview+1000), 对ctr限最大值:K2 = 0.6 if ctr > 0.6 else ctr
+    # backrate1_3_initial = backrate * backrate_2 * backrate_3
+    # backrate1_3 = 0.02 if backrate1_3_initial == 0 else backrate1_3_initial
+    # score = k2 * sharerate * (backrate + backrate1_3) * LOG(lastonehour_return+1)
+
+    df = df.fillna(0)
+    df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
+    df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
+    df['back_rate2'] = df['lasttwohour_return_now'] / (df['lasttwohour_share'] + 10)
+    df['back_rate3'] = df['lastthreehour_return_now'] / (df['lastthreehour_share'] + 10)
+    df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
+
+    if param.get('view_type', None) == 'video-show':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
+    elif param.get('view_type', None) == 'video-show-region':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
+    else:
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
+    df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
+
+    df['backrate1_3_initial'] = df['back_rate'] * df['back_rate2'] * df['back_rate3']
+    df['backrate1_3'] = df['backrate1_3_initial'].apply(lambda x: 0.02 if x == 0 else x)
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
+
+    df['score'] = df['K2'] * df['share_rate'] * (df['back_rate'] + df['backrate1_3']) * df['log_back']
+
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
+
+
+def cal_score(df, param):
+    if param.get('score_func', None) == 'add_backrate*log(return+1)':
+        df = cal_score_add_return(df=df, param=param)
+    elif param.get('score_func', None) == 'multiply_return_retention':
+        df = cal_score_multiply_return_retention(df=df, param=param)
+    elif param.get('score_func', None) == 'update_backrate':
+        df = cal_score_update_backrate(df=df, param=param)
+    else:
+        df = cal_score_initial(df=df, param=param)
+    return df
+
+
 def add_func1(initial_df, pre_h_df):
     """当前小时级数据与前几个小时数据合并"""
     score_list = initial_df['score'].to_list()

+ 1 - 1
videos_filter.py

@@ -1081,7 +1081,7 @@ def main():
         # appType = 19, ROV召回池视频过滤
         # filter_rov_pool(app_type=config_.APP_TYPE['ZUI_JING_QI'])
         # 流量池视频过滤
-        # filter_flow_pool()
+        filter_flow_pool()
         # filter_flow_pool_new()
         filter_flow_pool_new_with_level()
         # 兜底视频过滤