Pārlūkot izejas kodu

add old-video-recall ab-test

liqian 3 gadi atpakaļ
vecāks
revīzija
61c00b6cfe
4 mainītis faili ar 105 papildinājumiem un 7 dzēšanām
  1. 6 0
      config.py
  2. 34 7
      recommend.py
  3. 40 0
      video_rank.py
  4. 25 0
      video_recall.py

+ 6 - 0
config.py

@@ -30,6 +30,7 @@ class BaseConfig(object):
         'day_rule_rank1': '026',
         'day_rule_rank2': '030',
         'ab_initial': '031',
+        'old_video': '034',
     }
 
     # abTest
@@ -59,6 +60,7 @@ class BaseConfig(object):
             'day_rule_rank2': 40002,
         },  # 天级别规则更新rov列表实验
         'ab_initial': 20000,  # ab实验相对实验组(无人工调整)
+        'old_video': 50001,  # 固定位置插入老视频
     }
 
     # 小程序小时级列表key不同实验标识
@@ -89,6 +91,7 @@ class BaseConfig(object):
         'relevant_video_op': 'relevant_video_op',  # 相关推荐强插
         'rov_recall_h': 'recall_pool_h',  # 小时级更新列表
         'rov_recall_day': 'recall_pool_day',  # 天级规则更新列表
+        'old_video': 'old_video_recall',  # 老视频
     }
 
     # category id mapping
@@ -138,6 +141,9 @@ class BaseConfig(object):
     # appType:[18, 19]小程序离线ROV模型结果存放 redis key前缀,完整格式:com.weiqu.video.recall.hot.item.score.{appType}.{date}.{h}
     RECALL_KEY_NAME_PREFIX_APP_TYPE = 'com.weiqu.video.recall.hot.item.score.'
 
+    # 小程序老视频更新结果存放 redis key 前缀,完整格式:'com.weiqu.video.recall.old.item.{date}'
+    RECALL_KEY_NAME_PREFIX_OLD_VIDEOS = 'com.weiqu.video.recall.old.item.'
+
     # appType = 6, ROV召回池redis key前缀,完整格式:com.weiqu.video.recall.hot.apptype.h.item.score.6.{h}
     # RECALL_KEY_NAME_PREFIX_APP_TYPE = 'com.weiqu.video.recall.hot.apptype.h.item.score.6.'
 

+ 34 - 7
recommend.py

@@ -10,7 +10,7 @@ import config
 from log import Log
 from config import set_config
 from video_recall import PoolRecall
-from video_rank import video_rank, bottom_strategy, video_rank_by_w_h_rate
+from video_rank import video_rank, bottom_strategy, video_rank_by_w_h_rate, video_rank_with_old_video
 from db_helper import RedisHelper
 import gevent
 from utils import FilterVideos
@@ -136,7 +136,7 @@ def positon_duplicate(pos1_vids, pos2_vids, videos):
 
 
 def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, client_info, expire_time=24*3600,
-                    ab_code=config_.AB_CODE['initial'], rule_key='', no_op_flag=False):
+                    ab_code=config_.AB_CODE['initial'], rule_key='', no_op_flag=False, old_video_index=-1):
     """
     首页线上推荐逻辑
     :param mid: mid type-string
@@ -173,18 +173,26 @@ def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, cli
     pool_recall = PoolRecall(app_type=app_type, mid=mid, uid=uid, ab_code=ab_code,
                              client_info=client_info, rule_key=rule_key, no_op_flag=no_op_flag)
     _, last_rov_recall_key, _ = pool_recall.get_video_last_idx()
+    # 小时级实验
     if ab_code in [code for _, code in config_.AB_CODE['rank_by_h'].items()]:
         t = [gevent.spawn(pool_recall.rov_pool_recall_by_h, size, expire_time),
              gevent.spawn(pool_recall.flow_pool_recall, size)]
+    # 最惊奇/老好看实验
     elif app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
         # if ab_code == config_.AB_CODE['rov_rank_appType_18_19']:
         t = [gevent.spawn(pool_recall.rov_pool_recall, size, expire_time),
              gevent.spawn(pool_recall.flow_pool_recall_18_19, size)]
         # else:
         #     t = [gevent.spawn(pool_recall.rov_pool_recall, size, expire_time)]
+    # 天级实验
     elif ab_code in [code for _, code in config_.AB_CODE['rank_by_day'].items()]:
         t = [gevent.spawn(pool_recall.rov_pool_recall_by_day, size, expire_time),
              gevent.spawn(pool_recall.flow_pool_recall, size)]
+    # 老视频实验
+    elif ab_code in [config_.AB_CODE['old_video']]:
+        t = [gevent.spawn(pool_recall.rov_pool_recall, size, expire_time),
+             gevent.spawn(pool_recall.flow_pool_recall, size),
+             gevent.spawn(pool_recall.old_videos_recall, size)]
     else:
         t = [gevent.spawn(pool_recall.rov_pool_recall, size, expire_time),
              gevent.spawn(pool_recall.flow_pool_recall, size)]
@@ -215,6 +223,12 @@ def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, cli
             'flow_pool_recall': recall_result_list[1]
         }
     rank_result = video_rank(data=data, size=size, top_K=top_K, flow_pool_P=flow_pool_P)
+
+    # 老视频实验
+    if ab_code in [config_.AB_CODE['old_video']]:
+        rank_result = video_rank_with_old_video(rank_result=rank_result, old_video_recall=recall_result_list[2],
+                                                size=size, top_K=top_K, old_video_index=old_video_index)
+
     end_rank = time.time()
     log_.info('mid: {}, uid: {}, rank_result: {}, execute time = {}ms'.format(
         mid, uid, rank_result, (end_rank - start_rank) * 1000))
@@ -355,6 +369,7 @@ def get_recommend_params(ab_exp_info):
         ab_code = config_.AB_CODE['initial']
         expire_time = 24 * 3600
         rule_key = config_.RULE_KEY['initial']
+        old_video_index = -1
     else:
         ab_exp_code_list = []
         config_value_dict = {}
@@ -449,7 +464,15 @@ def get_recommend_params(ab_exp_info):
             flow_pool_P = config_.P_18_19
             no_op_flag = True
 
-    return top_K, flow_pool_P, ab_code, rule_key, expire_time, no_op_flag
+        # 老视频实验
+        if config_.AB_EXP_CODE['old_video'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['old_video']
+            no_op_flag = True
+            old_video_index = 2
+        else:
+            old_video_index = -1
+
+    return top_K, flow_pool_P, ab_code, rule_key, expire_time, no_op_flag, old_video_index
 
 
 def video_homepage_recommend(mid, uid, size, app_type, algo_type, client_info, ab_exp_info):
@@ -500,14 +523,16 @@ def video_homepage_recommend(mid, uid, size, app_type, algo_type, client_info, a
                           top_K=top_K, expire_time=12 * 3600)
 
     else:
-        top_K, flow_pool_P, ab_code, rule_key, expire_time, no_op_flag = get_recommend_params(ab_exp_info=ab_exp_info)
+        top_K, flow_pool_P, ab_code, rule_key, expire_time, no_op_flag, old_video_index = \
+            get_recommend_params(ab_exp_info=ab_exp_info)
 
         # 简单召回 - 排序 - 兜底
         rank_result, last_rov_recall_key = video_recommend(mid=mid, uid=uid, app_type=app_type,
                                                            size=size, top_K=top_K, flow_pool_P=flow_pool_P,
                                                            algo_type=algo_type, client_info=client_info,
                                                            ab_code=ab_code, expire_time=expire_time,
-                                                           rule_key=rule_key, no_op_flag=no_op_flag)
+                                                           rule_key=rule_key, no_op_flag=no_op_flag,
+                                                           old_video_index=old_video_index)
         # ab-test
         # result = ab_test_op(rank_result=rank_result,
         #                     ab_code_list=[config_.AB_CODE['position_insert']],
@@ -530,14 +555,16 @@ def video_relevant_recommend(video_id, mid, uid, size, app_type, ab_exp_info):
     :param ab_exp_info: ab实验分组参数 [{"expItemId":1, "configValue":{"size":4, "K":3, ...}}, ...]
     :return: videos type-list
     """
-    top_K, flow_pool_P, ab_code, rule_key, expire_time, no_op_flag = get_recommend_params(ab_exp_info=ab_exp_info)
+    top_K, flow_pool_P, ab_code, rule_key, expire_time, no_op_flag, old_video_index = \
+        get_recommend_params(ab_exp_info=ab_exp_info)
 
     # 简单召回 - 排序 - 兜底
     rank_result, last_rov_recall_key = video_recommend(mid=mid, uid=uid, app_type=app_type,
                                                        size=size, top_K=top_K, flow_pool_P=flow_pool_P,
                                                        algo_type='', client_info=None,
                                                        ab_code=ab_code, expire_time=expire_time,
-                                                       rule_key=rule_key, no_op_flag=no_op_flag)
+                                                       rule_key=rule_key, no_op_flag=no_op_flag,
+                                                       old_video_index=old_video_index)
     # ab-test
     # result = ab_test_op(rank_result=rank_result,
     #                     ab_code_list=[config_.AB_CODE['position_insert'], config_.AB_CODE['relevant_video_op']],

+ 40 - 0
video_rank.py

@@ -269,6 +269,46 @@ def video_rank_by_w_h_rate(videos):
     return new_rank_result
 
 
+def video_rank_with_old_video(rank_result, old_video_recall, size, top_K, old_video_index=2):
+    """
+    视频分发排序 - 包含老视频, 老视频插入固定位置
+    :param rank_result: 排序后的结果
+    :param size: 请求数
+    :param old_video_index: 老视频插入的位置索引,默认为2
+    :return: new_rank_result
+    """
+    if not old_video_recall:
+        return rank_result
+    # 视频去重
+    rank_video_ids = [item['videoId'] for item in rank_result]
+    old_video_remove = []
+    for old_video in old_video_recall:
+        if old_video['videoId'] in rank_video_ids:
+            old_video_remove.append(old_video)
+    for item in old_video_remove:
+        old_video_recall.remove(item)
+
+    if not old_video_recall:
+        return rank_result
+
+    # 插入老视频
+    # 随机获取一个视频
+    ind = random.randint(0, len(old_video_recall) - 1)
+    old_video = old_video_recall[ind]
+    # 插入
+    if len(rank_result) < top_K:
+        new_rank_result = rank_result + [old_video]
+    else:
+        new_rank_result = rank_result[:old_video_index] + [old_video] + rank_result[old_video_index:]
+        if len(new_rank_result) > size:
+            # 判断后两位视频来源
+            push_from_1 = new_rank_result[-1]['pushFrom']
+            push_from_2 = new_rank_result[-2]['pushFrom']
+            if push_from_2 == config_.PUSH_FROM['rov_recall'] and push_from_1 == config_.PUSH_FROM['flow_recall']:
+                new_rank_result = new_rank_result[:-2] + new_rank_result[-1:]
+    return new_rank_result[:size]
+
+
 if __name__ == '__main__':
     d_test = [{'videoId': 10028734, 'rovScore': 99.977, 'pushFrom': 'recall_pool', 'abCode': 10000},
               {'videoId': 1919925, 'rovScore': 99.974, 'pushFrom': 'recall_pool', 'abCode': 10000},

+ 25 - 0
video_recall.py

@@ -1,5 +1,6 @@
 import time
 import traceback
+import random
 
 from datetime import date, timedelta, datetime
 from log import Log
@@ -909,3 +910,27 @@ class PoolRecall(object):
         else:
             idx = 0
         return rule_key_name, last_rule_day_recall_key, idx
+
+    def old_videos_recall(self, size):
+        """老视频召回"""
+        # 获取老视频
+        now_dt = datetime.strftime(datetime.today(), '%Y%m%d')
+        key_name = f'{config_.RECALL_KEY_NAME_PREFIX_OLD_VIDEOS}{now_dt}'
+        old_videos = self.redis_helper.get_data_from_set(key_name=key_name)
+        if not old_videos:
+            return []
+        # 过滤
+        old_video_ids = [int(video_id) for video_id in old_videos]
+        filter_ = FilterVideos(app_type=self.app_type, video_ids=old_video_ids, mid=self.mid, uid=self.uid)
+        ge = gevent.spawn(filter_.filter_videos)
+        ge.join()
+        filtered_videos = ge.get()
+        if not filtered_videos:
+            return []
+        # 添加视频源参数 pushFrom, abCode
+        old_video_result = [{'videoId': int(item), 'rovScore': 0,
+                             'pushFrom': config_.PUSH_FROM['old_video'], 'abCode': self.ab_code}
+                            for item in filtered_videos]
+        # 随机抽取 size+1 条数据
+        random.shuffle(old_video_result)
+        return old_video_result[:size+1]