liqian 3 лет назад
Родитель
Сommit
8cb4f60231
4 измененных файлов с 119 добавлено и 47 удалено
  1. 7 3
      app.py
  2. 20 0
      config.py
  3. 75 31
      recommend.py
  4. 17 13
      video_rank.py

+ 7 - 3
app.py

@@ -39,10 +39,11 @@ def homepage_recommend():
         mid = request_data.get('mid')
         uid = request_data.get('uid')
         category_id = request_data.get('categoryId')
-        size = request_data.get('size')
+        size = request_data.get('size', 10)
         app_type = request_data.get('appType')
         algo_type = request_data.get('algoType')
         client_info = request_data.get('clientInfo')
+        ab_exp_info = request_data.get('abExpInfo', None)
         log_.info({'requestUri': '/applet/video/homepage/recommend', 'requestData': request_data})
         # log_.info('homepage_recommend request data: {}'.format(request_data))
         # size默认为10
@@ -51,7 +52,8 @@ def homepage_recommend():
         if category_id in config_.CATEGORY['recommend']:
             # 推荐
             videos = video_homepage_recommend(mid=mid, uid=uid, size=size, app_type=app_type,
-                                              algo_type=algo_type, client_info=client_info)
+                                              algo_type=algo_type, client_info=client_info,
+                                              ab_exp_info=ab_exp_info)
             result = {'code': 200, 'message': 'success', 'data': {'videos': videos}}
             log_.info({'requestUri': '/applet/video/homepage/recommend',
                        'app_type': app_type,
@@ -96,11 +98,13 @@ def relevant_recommend():
         page_num = request_data.get('pageNum', 1)
         page_size = request_data.get('pageSize', 10)
         app_type = request_data.get('appType')
+        ab_exp_info = request_data.get('abExpInfo', None)
         log_.info({'requestUri': '/applet/video/relevant/recommend', 'requestData': request_data})
         # log_.info('requestUri = "{}", requestData = "{}"'.format('/applet/video/relevant/recommend', request_data))
         # log_.info('relevant_recommend request data: {}'.format(request_data))
 
-        videos = video_relevant_recommend(video_id=video_id, mid=mid, uid=uid, size=page_size, app_type=app_type)
+        videos = video_relevant_recommend(video_id=video_id, mid=mid, uid=uid, size=page_size, app_type=app_type,
+                                          ab_exp_info=ab_exp_info)
 
         result = {'code': 200, 'message': 'success', 'data': {'videos': videos}}
         log_.info({'requestUri': '/applet/video/relevant/recommend',

+ 20 - 0
config.py

@@ -164,6 +164,11 @@ class DevelopmentConfig(BaseConfig):
         'PROJECT': 'rov-server-test',
     }
 
+    # AB实验配置
+    EXP_ITEM_ID = {
+        'rec_size': 22,
+    }
+
 
 class TestConfig(BaseConfig):
     """测试环境配置"""
@@ -213,6 +218,11 @@ class TestConfig(BaseConfig):
         'PROJECT': 'rov-server-test',
     }
 
+    # AB实验配置
+    EXP_ITEM_ID = {
+        'rec_size': 22,
+    }
+
 
 class PreProductionConfig(BaseConfig):
     """预发布环境配置"""
@@ -262,6 +272,11 @@ class PreProductionConfig(BaseConfig):
         'PROJECT': 'rov-server',
     }
 
+    # AB实验配置
+    EXP_ITEM_ID = {
+        'rec_size': -1,
+    }
+
 
 class ProductionConfig(BaseConfig):
     """生产环境配置"""
@@ -311,6 +326,11 @@ class ProductionConfig(BaseConfig):
         'PROJECT': 'rov-server',
     }
 
+    # AB实验配置
+    EXP_ITEM_ID = {
+        'rec_size': -1,
+    }
+
 
 def set_config():
     # 获取环境变量 ROV_SERVER_ENV

+ 75 - 31
recommend.py

@@ -135,13 +135,15 @@ def positon_duplicate(pos1_vids, pos2_vids, videos):
     return l
 
 
-def video_recommend(mid, uid, size, app_type, algo_type, client_info, expire_time=24*3600,
+def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, client_info, expire_time=24*3600,
                     ab_code=config_.AB_CODE['initial']):
     """
     首页线上推荐逻辑
     :param mid: mid type-string
     :param uid: uid type-string
     :param size: 请求视频数量 type-int
+    :param top_K: 保证topK为召回池视频 type-int
+    :param flow_pool_P: size-top_K视频为流量池视频的概率 type-float
     :param app_type: 产品标识  type-int
     :param algo_type: 算法类型  type-string
     :param client_info: 用户位置信息 {"country": "国家",  "province": "省份",  "city": "城市"}
@@ -186,7 +188,7 @@ def video_recommend(mid, uid, size, app_type, algo_type, client_info, expire_tim
         'rov_pool_recall': recall_result_list[0],
         'flow_pool_recall': recall_result_list[1]
     }
-    rank_result = video_rank(data=data, size=size)
+    rank_result = video_rank(data=data, size=size, top_K=top_K, flow_pool_P=flow_pool_P)
     end_rank = time.time()
     log_.info('mid: {}, uid: {}, rank_result: {}, execute time = {}ms'.format(
         mid, uid, rank_result, (end_rank - start_rank) * 1000))
@@ -216,10 +218,10 @@ def ab_test_op(rank_result, ab_code_list, app_type, mid, uid, **kwargs):
     """
     # ####### 视频宽高比AB实验
     # 对内容精选进行 视频宽高比分发实验
-    if config_.AB_CODE['w_h_rate'] in ab_code_list and app_type in config_.AB_TEST.get('w_h_rate', []):
-        rank_result = video_rank_by_w_h_rate(videos=rank_result)
-        log_.info('app_type: {}, mid: {}, uid: {}, rank_by_w_h_rate_result: {}'.format(
-            app_type, mid, uid, rank_result))
+    # if config_.AB_CODE['w_h_rate'] in ab_code_list and app_type in config_.AB_TEST.get('w_h_rate', []):
+    #     rank_result = video_rank_by_w_h_rate(videos=rank_result)
+    #     log_.info('app_type: {}, mid: {}, uid: {}, rank_by_w_h_rate_result: {}'.format(
+    #         app_type, mid, uid, rank_result))
 
     # 按position位置排序
     if config_.AB_CODE['position_insert'] in ab_code_list and app_type in config_.AB_TEST.get('position_insert', []):
@@ -230,26 +232,27 @@ def ab_test_op(rank_result, ab_code_list, app_type, mid, uid, **kwargs):
             app_type, mid, uid, rank_result))
 
     # 相关推荐强插
-    if config_.AB_CODE['relevant_video_op'] in ab_code_list \
-            and app_type in config_.AB_TEST.get('relevant_video_op', []):
-        head_vid = kwargs['head_vid']
-        size = kwargs['size']
-        rank_result = relevant_video_top_recommend(
-            app_type=app_type, mid=mid, uid=uid, head_vid=head_vid, videos=rank_result, size=size
-        )
-        log_.info('app_type: {}, mid: {}, uid: {}, head_vid: {}, rank_by_relevant_video_op_result: {}'.format(
-            app_type, mid, uid, head_vid, rank_result))
+    # if config_.AB_CODE['relevant_video_op'] in ab_code_list \
+    #         and app_type in config_.AB_TEST.get('relevant_video_op', []):
+    #     head_vid = kwargs['head_vid']
+    #     size = kwargs['size']
+    #     rank_result = relevant_video_top_recommend(
+    #         app_type=app_type, mid=mid, uid=uid, head_vid=head_vid, videos=rank_result, size=size
+    #     )
+    #     log_.info('app_type: {}, mid: {}, uid: {}, head_vid: {}, rank_by_relevant_video_op_result: {}'.format(
+    #         app_type, mid, uid, head_vid, rank_result))
 
     return rank_result
 
 
-def update_redis_data(result, app_type, mid, last_rov_recall_key, expire_time=24*3600):
+def update_redis_data(result, app_type, mid, last_rov_recall_key, top_K, expire_time=24*3600):
     """
     根据最终的排序结果更新相关redis数据
     :param result: 排序结果
     :param app_type: 产品标识
     :param mid: mid
     :param last_rov_recall_key: 用户上一次在rov召回池对应的位置 redis key
+    :param top_K: 保证topK为召回池视频 type-int
     :param expire_time: 末位视频记录redis过期时间
     :return: None
     """
@@ -265,8 +268,8 @@ def update_redis_data(result, app_type, mid, last_rov_recall_key, expire_time=24
             redis_helper.add_data_with_set(key_name=preview_key_name, values=tuple(preview_video_ids), expire_time=30 * 60)
             log_.info('preview redis update success!')
 
-        # 将此次获取的ROV召回池config_.K末位视频id同步刷新到Redis中,方便下次快速定位到召回位置,过期时间为1天
-        rov_recall_video = [item['videoId'] for item in result[:config_.K]
+        # 将此次获取的ROV召回池top_K末位视频id同步刷新到Redis中,方便下次快速定位到召回位置,过期时间为1天
+        rov_recall_video = [item['videoId'] for item in result[:top_K]
                             if item['pushFrom'] == config_.PUSH_FROM['rov_recall']]
         if len(rov_recall_video) > 0:
             if app_type == config_.APP_TYPE['APP']:
@@ -315,7 +318,7 @@ def update_local_distribute_count(videos):
         log_.error(traceback.format_exc())
 
 
-def video_homepage_recommend(mid, uid, size, app_type, algo_type, client_info):
+def video_homepage_recommend(mid, uid, size, app_type, algo_type, client_info, ab_exp_info):
     """
     首页线上推荐逻辑
     :param mid: mid type-string
@@ -324,8 +327,10 @@ def video_homepage_recommend(mid, uid, size, app_type, algo_type, client_info):
     :param app_type: 产品标识  type-int
     :param algo_type: 算法类型  type-string
     :param client_info: 用户位置信息 {"country": "国家",  "province": "省份",  "city": "城市"}
+    :param ab_exp_info: ab实验分组参数 [{"expItemId":1, "configValue":{"size":4, "K":3, ...}}, ...]
     :return:
     """
+
     # 对 vlog 切换10%的流量做实验
     # 对mid进行哈希
     # hash_mid = hashlib.md5(mid.encode('utf-8')).hexdigest()
@@ -345,8 +350,11 @@ def video_homepage_recommend(mid, uid, size, app_type, algo_type, client_info):
 
     if app_type == config_.APP_TYPE['APP']:
         # 票圈视频APP
+        top_K = config_.K
+        flow_pool_P = config_.P
         # 简单召回 - 排序 - 兜底
-        rank_result, last_rov_recall_key = video_recommend(mid=mid, uid=uid, size=size, app_type=app_type,
+        rank_result, last_rov_recall_key = video_recommend(mid=mid, uid=uid, app_type=app_type,
+                                                           size=size, top_K=top_K, flow_pool_P=flow_pool_P,
                                                            algo_type=algo_type, client_info=client_info,
                                                            expire_time=12 * 3600)
         # ab-test
@@ -355,22 +363,42 @@ def video_homepage_recommend(mid, uid, size, app_type, algo_type, client_info):
                             app_type=app_type, mid=mid, uid=uid)
         # redis数据刷新
         update_redis_data(result=result, app_type=app_type, mid=mid, last_rov_recall_key=last_rov_recall_key,
-                          expire_time=12 * 3600)
+                          top_K=top_K, expire_time=12 * 3600)
+
     else:
+
+        if ab_exp_info is None:
+            size = size
+            top_K = config_.K
+            flow_pool_P = config_.P
+        else:
+            exp_item_id_list = [item.get('expItemId') for item in ab_exp_info]
+            if config_.EXP_ITEM_ID['rec_size'] in exp_item_id_list:
+                config_value = ab_exp_info[exp_item_id_list.index(config_.EXP_ITEM_ID['rec_size'])].get('configValue')
+                size = config_value.get('size', 4)
+                top_K = config_value.get('K', 3)
+                flow_pool_P = config_value.get('P', 0.3)
+            else:
+                size = size
+                top_K = config_.K
+                flow_pool_P = config_.P
+
         # 简单召回 - 排序 - 兜底
-        rank_result, last_rov_recall_key = video_recommend(mid=mid, uid=uid, size=size, app_type=app_type,
+        rank_result, last_rov_recall_key = video_recommend(mid=mid, uid=uid, app_type=app_type,
+                                                           size=size, top_K=top_K, flow_pool_P=flow_pool_P,
                                                            algo_type=algo_type, client_info=client_info)
         # ab-test
         result = ab_test_op(rank_result=rank_result,
                             ab_code_list=[config_.AB_CODE['position_insert']],
                             app_type=app_type, mid=mid, uid=uid)
         # redis数据刷新
-        update_redis_data(result=result, app_type=app_type, mid=mid, last_rov_recall_key=last_rov_recall_key)
+        update_redis_data(result=result, app_type=app_type, mid=mid, last_rov_recall_key=last_rov_recall_key,
+                          top_K=top_K)
 
     return result
 
 
-def video_relevant_recommend(video_id, mid, uid, size, app_type):
+def video_relevant_recommend(video_id, mid, uid, size, app_type, ab_exp_info):
     """
     相关推荐逻辑
     :param video_id: 相关推荐的头部视频id
@@ -378,21 +406,37 @@ def video_relevant_recommend(video_id, mid, uid, size, app_type):
     :param uid: uid type-string
     :param size: 请求视频数量 type-int
     :param app_type: 产品标识  type-int
+    :param ab_exp_info: ab实验分组参数 [{"expItemId":1, "configValue":{"size":4, "K":3, ...}}, ...]
     :return: videos type-list
     """
-    # videos = video_recommend(mid=mid, uid=uid, size=size, app_type=app_type, algo_type='', client_info=None)
+
+    if ab_exp_info is None:
+        size = size
+        top_K = config_.K
+        flow_pool_P = config_.P
+    else:
+        exp_item_id_list = [item.get('expItemId') for item in ab_exp_info]
+        if config_.EXP_ITEM_ID['rec_size'] in exp_item_id_list:
+            config_value = ab_exp_info[exp_item_id_list.index(config_.EXP_ITEM_ID['rec_size'])].get('configValue')
+            size = config_value.get('size', 4)
+            top_K = config_value.get('K', 3)
+            flow_pool_P = config_value.get('P', 0.3)
+        else:
+            size = size
+            top_K = config_.K
+            flow_pool_P = config_.P
+
     # 简单召回 - 排序 - 兜底
-    rank_result, last_rov_recall_key = video_recommend(mid=mid, uid=uid, size=size, app_type=app_type,
+    rank_result, last_rov_recall_key = video_recommend(mid=mid, uid=uid, app_type=app_type,
+                                                       size=size, top_K=top_K, flow_pool_P=flow_pool_P,
                                                        algo_type='', client_info=None)
     # ab-test
     result = ab_test_op(rank_result=rank_result,
-                        ab_code_list=[
-                            config_.AB_CODE['position_insert'],
-                            config_.AB_CODE['relevant_video_op']
-                        ],
+                        ab_code_list=[config_.AB_CODE['position_insert'], config_.AB_CODE['relevant_video_op']],
                         app_type=app_type, mid=mid, uid=uid, head_vid=video_id, size=size)
     # redis数据刷新
-    update_redis_data(result=result, app_type=app_type, mid=mid, last_rov_recall_key=last_rov_recall_key)
+    update_redis_data(result=result, app_type=app_type, mid=mid, last_rov_recall_key=last_rov_recall_key,
+                      top_K=top_K)
 
     return result
 

+ 17 - 13
video_rank.py

@@ -11,11 +11,13 @@ log_ = Log()
 config_ = set_config()
 
 
-def video_rank(data, size):
+def video_rank(data, size, top_K, flow_pool_P):
     """
     视频分发排序
     :param data: 各路召回的视频 type-dict {'rov_pool_recall': [], 'flow_pool_recall': []}
     :param size: 请求数
+    :param top_K: 保证topK为召回池视频 type-int
+    :param flow_pool_P: size-top_K视频为流量池视频的概率 type-float
     :return: rank_result
     """
     if not data['rov_pool_recall'] and not data['flow_pool_recall']:
@@ -26,59 +28,61 @@ def video_rank(data, size):
     # 流量池
     flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: (k.get('rovScore'), 0), reverse=True)
     # 对各路召回的视频进行去重
-    rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank)
+    rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank,
+                                                         top_K=top_K)
     # log_.info('remove_duplicate finished! rov_recall_rank = {}, flow_recall_rank = {}'.format(
     #     rov_recall_rank, flow_recall_rank))
     # 从ROV召回池中获取top k
     if len(rov_recall_rank) > 0:
-        rank_result = rov_recall_rank[:config_.K]
-        rov_recall_rank = rov_recall_rank[config_.K:]
+        rank_result = rov_recall_rank[:top_K]
+        rov_recall_rank = rov_recall_rank[top_K:]
     else:
-        rank_result = flow_recall_rank[:config_.K]
-        flow_recall_rank = flow_recall_rank[config_.K:]
+        rank_result = flow_recall_rank[:top_K]
+        flow_recall_rank = flow_recall_rank[top_K:]
 
     # 按概率 p 及score排序获取 size - k 个视频
     i = 0
-    while i < size - config_.K:
+    while i < size - top_K:
         # 随机生成[0, 1)浮点数
         rand = random.random()
         # log_.info('rand: {}'.format(rand))
-        if rand < config_.P:
+        if rand < flow_pool_P:
             if flow_recall_rank:
                 rank_result.append(flow_recall_rank[0])
                 flow_recall_rank.remove(flow_recall_rank[0])
             else:
-                rank_result.extend(rov_recall_rank[:size - config_.K - i])
+                rank_result.extend(rov_recall_rank[:size - top_K - i])
                 return rank_result
         else:
             if rov_recall_rank:
                 rank_result.append(rov_recall_rank[0])
                 rov_recall_rank.remove(rov_recall_rank[0])
             else:
-                rank_result.extend(flow_recall_rank[:size - config_.K - i])
+                rank_result.extend(flow_recall_rank[:size - top_K - i])
                 return rank_result
         i += 1
     return rank_result
 
 
-def remove_duplicate(rov_recall, flow_recall):
+def remove_duplicate(rov_recall, flow_recall, top_K):
     """
     对多路召回的视频去重
     去重原则:
         如果视频在ROV召回池topK,则保留ROV召回池,否则保留流量池
     :param rov_recall: ROV召回池-已排序
     :param flow_recall: 流量池-已排序
+    :param top_K: 保证topK为召回池视频 type-int
     :return:
     """
     flow_recall_result = []
     rov_recall_remove = []
     flow_recall_video_ids = [item['videoId'] for item in flow_recall]
     # rov_recall topK
-    for item in rov_recall[:config_.K]:
+    for item in rov_recall[:top_K]:
         if item['videoId'] in flow_recall_video_ids:
             flow_recall_video_ids.remove(item['videoId'])
     # other
-    for item in rov_recall[config_.K:]:
+    for item in rov_recall[top_K:]:
         if item['videoId'] in flow_recall_video_ids:
             rov_recall_remove.append(item)