Browse Source

add no-op-flag param test

liqian 3 years ago
parent
commit
cced366606
3 changed files with 123 additions and 173 deletions
  1. 2 0
      config.py
  2. 113 164
      recommend.py
  3. 8 9
      video_recall.py

+ 2 - 0
config.py

@@ -29,6 +29,7 @@ class BaseConfig(object):
         'rov_rank_appType_19': '027',
         'day_rule_rank1': '026',
         'day_rule_rank2': '030',
+        'ab_initial': '031',
     }
 
     # abTest
@@ -57,6 +58,7 @@ class BaseConfig(object):
             'day_rule_rank1': 40001,
             'day_rule_rank2': 40002,
         },  # 天级别规则更新rov列表实验
+        'ab_initial': 20000,  # ab实验相对实验组(无人工调整)
     }
 
     # 小程序小时级列表key不同实验标识

+ 113 - 164
recommend.py

@@ -136,7 +136,7 @@ def positon_duplicate(pos1_vids, pos2_vids, videos):
 
 
 def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, client_info, expire_time=24*3600,
-                    ab_code=config_.AB_CODE['initial'], rule_key=''):
+                    ab_code=config_.AB_CODE['initial'], rule_key='', no_op_flag=False):
     """
     首页线上推荐逻辑
     :param mid: mid type-string
@@ -171,7 +171,7 @@ def video_recommend(mid, uid, size, top_K, flow_pool_P, app_type, algo_type, cli
     '''
     recall_result_list = []
     pool_recall = PoolRecall(app_type=app_type, mid=mid, uid=uid, ab_code=ab_code,
-                             client_info=client_info, rule_key=rule_key)
+                             client_info=client_info, rule_key=rule_key, no_op_flag=no_op_flag)
     _, last_rov_recall_key, _ = pool_recall.get_video_last_idx()
     if ab_code in [code for _, code in config_.AB_CODE['rank_by_h'].items()]:
         t = [gevent.spawn(pool_recall.rov_pool_recall_by_h, size, expire_time),
@@ -345,6 +345,113 @@ def update_local_distribute_count(videos):
         log_.error(traceback.format_exc())
 
 
+def get_recommend_params(ab_exp_info):
+    """根据实验分组给定对应的推荐参数"""
+    top_K = config_.K
+    flow_pool_P = config_.P
+    # 不获取人工干预数据标记
+    no_op_flag = False
+    if not ab_exp_info:
+        ab_code = config_.AB_CODE['initial']
+        expire_time = 24 * 3600
+        rule_key = config_.RULE_KEY['initial']
+    else:
+        ab_exp_code_list = []
+        config_value_dict = {}
+        for _, item in ab_exp_info.items():
+            if not item:
+                continue
+            for ab_item in item:
+                ab_exp_code = ab_item.get('abExpCode', None)
+                if not ab_exp_code:
+                    continue
+                ab_exp_code_list.append(str(ab_exp_code))
+                config_value_dict[str(ab_exp_code)] = ab_item.get('configValue', None)
+        # 推荐条数 10->4 实验
+        # if config_.AB_EXP_CODE['rec_size_home'] in ab_exp_code_list:
+        #     config_value = config_value_dict.get(config_.AB_EXP_CODE['rec_size_home'], None)
+        #     if config_value:
+        #         config_value = eval(str(config_value))
+        #     else:
+        #         config_value = {}
+        #     log_.info(f'config_value: {config_value}, type: {type(config_value)}')
+        #     size = int(config_value.get('size', 4))
+        #     top_K = int(config_value.get('K', 3))
+        #     flow_pool_P = float(config_value.get('P', 0.3))
+        # else:
+        #     size = size
+        #     top_K = config_.K
+        #     flow_pool_P = config_.P
+
+        # 算法实验相对对照组
+        if config_.AB_EXP_CODE['ab_initial'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['ab_initial']
+            expire_time = 24 * 3600
+            rule_key = config_.RULE_KEY['initial']
+            no_op_flag = True
+
+        # 小时级更新-规则1 实验
+        elif config_.AB_EXP_CODE['rule_rank1'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank1')
+            expire_time = 3600
+            rule_key = config_.RULE_KEY['rule_rank1']
+            no_op_flag = True
+
+        # elif config_.AB_EXP_CODE['rule_rank2'] in ab_exp_code_list:
+        #     ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank2')
+        #     expire_time = 3600
+        #     rule_key = config_.RULE_KEY['rule_rank2']
+
+        elif config_.AB_EXP_CODE['rule_rank3'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank3')
+            expire_time = 3600
+            rule_key = config_.RULE_KEY['rule_rank3']
+            no_op_flag = True
+
+        # elif config_.AB_EXP_CODE['rule_rank4'] in ab_exp_code_list:
+        #     ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank4')
+        #     expire_time = 3600
+        #     rule_key = config_.RULE_KEY['rule_rank4']
+
+        # elif config_.AB_EXP_CODE['rule_rank5'] in ab_exp_code_list:
+        #     ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank5')
+        #     expire_time = 3600
+        #     rule_key = config_.RULE_KEY['rule_rank5']
+
+        elif config_.AB_EXP_CODE['day_rule_rank1'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['rank_by_day'].get('day_rule_rank1')
+            expire_time = 24 * 3600
+            rule_key = config_.RULE_KEY_DAY['day_rule_rank1']
+            no_op_flag = True
+
+        elif config_.AB_EXP_CODE['day_rule_rank2'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['rank_by_day'].get('day_rule_rank2')
+            expire_time = 24 * 3600
+            rule_key = config_.RULE_KEY_DAY['day_rule_rank2']
+            no_op_flag = True
+
+        else:
+            ab_code = config_.AB_CODE['initial']
+            expire_time = 24 * 3600
+            rule_key = config_.RULE_KEY['initial']
+
+        # 老好看视频 / 票圈最惊奇 首页/相关推荐逻辑更新实验
+        if config_.AB_EXP_CODE['rov_rank_appType_18_19'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['rov_rank_appType_18_19']
+            expire_time = 3600
+            flow_pool_P = config_.P_18_19
+            no_op_flag = True
+
+        elif config_.AB_EXP_CODE['rov_rank_appType_19'] in ab_exp_code_list:
+            ab_code = config_.AB_CODE['rov_rank_appType_19']
+            expire_time = 3600
+            top_K = 2
+            flow_pool_P = config_.P_18_19
+            no_op_flag = True
+
+    return top_K, flow_pool_P, ab_code, rule_key, expire_time, no_op_flag
+
+
 def video_homepage_recommend(mid, uid, size, app_type, algo_type, client_info, ab_exp_info):
     """
     首页线上推荐逻辑
@@ -393,92 +500,14 @@ def video_homepage_recommend(mid, uid, size, app_type, algo_type, client_info, a
                           top_K=top_K, expire_time=12 * 3600)
 
     else:
-        # size = size
-        top_K = config_.K
-        flow_pool_P = config_.P
-        if not ab_exp_info:
-            ab_code = config_.AB_CODE['initial']
-            expire_time = 24 * 3600
-            rule_key = config_.RULE_KEY['initial']
-        else:
-            ab_exp_code_list = []
-            config_value_dict = {}
-            for _, item in ab_exp_info.items():
-                if not item:
-                    continue
-                for ab_item in item:
-                    ab_exp_code = ab_item.get('abExpCode', None)
-                    if not ab_exp_code:
-                        continue
-                    ab_exp_code_list.append(str(ab_exp_code))
-                    config_value_dict[str(ab_exp_code)] = ab_item.get('configValue', None)
-            # 推荐条数 10->4 实验
-            # if config_.AB_EXP_CODE['rec_size_home'] in ab_exp_code_list:
-            #     config_value = config_value_dict.get(config_.AB_EXP_CODE['rec_size_home'], None)
-            #     if config_value:
-            #         config_value = eval(str(config_value))
-            #     else:
-            #         config_value = {}
-            #     log_.info(f'config_value: {config_value}, type: {type(config_value)}')
-            #     size = int(config_value.get('size', 4))
-            #     top_K = int(config_value.get('K', 3))
-            #     flow_pool_P = float(config_value.get('P', 0.3))
-            # else:
-            #     size = size
-            #     top_K = config_.K
-            #     flow_pool_P = config_.P
-
-            # 小时级更新-规则1 实验
-            if config_.AB_EXP_CODE['rule_rank1'] in ab_exp_code_list:
-                ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank1')
-                expire_time = 3600
-                rule_key = config_.RULE_KEY['rule_rank1']
-            # elif config_.AB_EXP_CODE['rule_rank2'] in ab_exp_code_list:
-            #     ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank2')
-            #     expire_time = 3600
-            #     rule_key = config_.RULE_KEY['rule_rank2']
-            elif config_.AB_EXP_CODE['rule_rank3'] in ab_exp_code_list:
-                ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank3')
-                expire_time = 3600
-                rule_key = config_.RULE_KEY['rule_rank3']
-            # elif config_.AB_EXP_CODE['rule_rank4'] in ab_exp_code_list:
-            #     ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank4')
-            #     expire_time = 3600
-            #     rule_key = config_.RULE_KEY['rule_rank4']
-            # elif config_.AB_EXP_CODE['rule_rank5'] in ab_exp_code_list:
-            #     ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank5')
-            #     expire_time = 3600
-            #     rule_key = config_.RULE_KEY['rule_rank5']
-            elif config_.AB_EXP_CODE['day_rule_rank1'] in ab_exp_code_list:
-                ab_code = config_.AB_CODE['rank_by_day'].get('day_rule_rank1')
-                expire_time = 24 * 3600
-                rule_key = config_.RULE_KEY_DAY['day_rule_rank1']
-            elif config_.AB_EXP_CODE['day_rule_rank2'] in ab_exp_code_list:
-                ab_code = config_.AB_CODE['rank_by_day'].get('day_rule_rank2')
-                expire_time = 24 * 3600
-                rule_key = config_.RULE_KEY_DAY['day_rule_rank2']
-            else:
-                ab_code = config_.AB_CODE['initial']
-                expire_time = 24 * 3600
-                rule_key = config_.RULE_KEY['initial']
-
-            # 老好看视频 / 票圈最惊奇 首页/相关推荐逻辑更新实验
-            if config_.AB_EXP_CODE['rov_rank_appType_18_19'] in ab_exp_code_list:
-                ab_code = config_.AB_CODE['rov_rank_appType_18_19']
-                expire_time = 3600
-                flow_pool_P = config_.P_18_19
-            elif config_.AB_EXP_CODE['rov_rank_appType_19'] in ab_exp_code_list:
-                ab_code = config_.AB_CODE['rov_rank_appType_19']
-                expire_time = 3600
-                top_K = 2
-                flow_pool_P = config_.P_18_19
+        top_K, flow_pool_P, ab_code, rule_key, expire_time, no_op_flag = get_recommend_params(ab_exp_info=ab_exp_info)
 
         # 简单召回 - 排序 - 兜底
         rank_result, last_rov_recall_key = video_recommend(mid=mid, uid=uid, app_type=app_type,
                                                            size=size, top_K=top_K, flow_pool_P=flow_pool_P,
                                                            algo_type=algo_type, client_info=client_info,
                                                            ab_code=ab_code, expire_time=expire_time,
-                                                           rule_key=rule_key)
+                                                           rule_key=rule_key, no_op_flag=no_op_flag)
         # ab-test
         # result = ab_test_op(rank_result=rank_result,
         #                     ab_code_list=[config_.AB_CODE['position_insert']],
@@ -501,94 +530,14 @@ def video_relevant_recommend(video_id, mid, uid, size, app_type, ab_exp_info):
     :param ab_exp_info: ab实验分组参数 [{"expItemId":1, "configValue":{"size":4, "K":3, ...}}, ...]
     :return: videos type-list
     """
-    # size = size
-    top_K = config_.K
-    flow_pool_P = config_.P
-    # 解析ab实验参数
-    if not ab_exp_info:
-        ab_code = config_.AB_CODE['initial']
-        expire_time = 24 * 3600
-        rule_key = config_.RULE_KEY['initial']
-    else:
-        ab_exp_code_list = []
-        config_value_dict = {}
-        for _, item in ab_exp_info.items():
-            if not item:
-                continue
-            for ab_item in item:
-                ab_exp_code = ab_item.get('abExpCode', None)
-                if not ab_exp_code:
-                    continue
-                ab_exp_code_list.append(str(ab_exp_code))
-                config_value_dict[str(ab_exp_code)] = ab_item.get('configValue', None)
-
-        # 推荐条数 10->4 实验
-        # if config_.AB_EXP_CODE['rec_size_relevant'] in ab_exp_code_list:
-        #     config_value = config_value_dict.get(config_.AB_EXP_CODE['rec_size_relevant'], None)
-        #     if config_value:
-        #         config_value = eval(str(config_value))
-        #     else:
-        #         config_value = {}
-        #     log_.info(f'config_value: {config_value}, type: {type(config_value)}')
-        #     size = int(config_value.get('size', 4))
-        #     top_K = int(config_value.get('K', 3))
-        #     flow_pool_P = float(config_value.get('P', 0.3))
-        # else:
-        #     size = size
-        #     top_K = config_.K
-        #     flow_pool_P = config_.P
-
-        # 小时级更新-规则1 实验
-        if config_.AB_EXP_CODE['rule_rank1'] in ab_exp_code_list:
-            ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank1')
-            expire_time = 3600
-            rule_key = config_.RULE_KEY['rule_rank1']
-        # elif config_.AB_EXP_CODE['rule_rank2'] in ab_exp_code_list:
-        #     ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank2')
-        #     expire_time = 3600
-        #     rule_key = config_.RULE_KEY['rule_rank2']
-        elif config_.AB_EXP_CODE['rule_rank3'] in ab_exp_code_list:
-            ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank3')
-            expire_time = 3600
-            rule_key = config_.RULE_KEY['rule_rank3']
-        # elif config_.AB_EXP_CODE['rule_rank4'] in ab_exp_code_list:
-        #     ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank4')
-        #     expire_time = 3600
-        #     rule_key = config_.RULE_KEY['rule_rank4']
-        # elif config_.AB_EXP_CODE['rule_rank5'] in ab_exp_code_list:
-        #     ab_code = config_.AB_CODE['rank_by_h'].get('rule_rank5')
-        #     expire_time = 3600
-        #     rule_key = config_.RULE_KEY['rule_rank5']
-        elif config_.AB_EXP_CODE['day_rule_rank1'] in ab_exp_code_list:
-            ab_code = config_.AB_CODE['rank_by_day'].get('day_rule_rank1')
-            expire_time = 24 * 3600
-            rule_key = config_.RULE_KEY_DAY['day_rule_rank1']
-        elif config_.AB_EXP_CODE['day_rule_rank2'] in ab_exp_code_list:
-            ab_code = config_.AB_CODE['rank_by_day'].get('day_rule_rank2')
-            expire_time = 24 * 3600
-            rule_key = config_.RULE_KEY_DAY['day_rule_rank2']
-        else:
-            ab_code = config_.AB_CODE['initial']
-            expire_time = 24 * 3600
-            rule_key = config_.RULE_KEY['initial']
-
-        # 老好看视频 / 票圈最惊奇 首页/相关推荐逻辑更新实验
-        if config_.AB_EXP_CODE['rov_rank_appType_18_19'] in ab_exp_code_list:
-            ab_code = config_.AB_CODE['rov_rank_appType_18_19']
-            expire_time = 3600
-            flow_pool_P = config_.P_18_19
-        elif config_.AB_EXP_CODE['rov_rank_appType_19'] in ab_exp_code_list:
-            ab_code = config_.AB_CODE['rov_rank_appType_19']
-            expire_time = 3600
-            top_K = 2
-            flow_pool_P = config_.P_18_19
+    top_K, flow_pool_P, ab_code, rule_key, expire_time, no_op_flag = get_recommend_params(ab_exp_info=ab_exp_info)
 
     # 简单召回 - 排序 - 兜底
     rank_result, last_rov_recall_key = video_recommend(mid=mid, uid=uid, app_type=app_type,
                                                        size=size, top_K=top_K, flow_pool_P=flow_pool_P,
                                                        algo_type='', client_info=None,
                                                        ab_code=ab_code, expire_time=expire_time,
-                                                       rule_key=rule_key)
+                                                       rule_key=rule_key, no_op_flag=no_op_flag)
     # ab-test
     # result = ab_test_op(rank_result=rank_result,
     #                     ab_code_list=[config_.AB_CODE['position_insert'], config_.AB_CODE['relevant_video_op']],

+ 8 - 9
video_recall.py

@@ -14,7 +14,7 @@ config_ = set_config()
 
 class PoolRecall(object):
     """召回"""
-    def __init__(self, app_type, client_info=None, mid='', uid='', ab_code='', rule_key=''):
+    def __init__(self, app_type, client_info=None, mid='', uid='', ab_code='', rule_key='', no_op_flag=False):
         """
         初始化
         :param app_type: 产品标识 type-int
@@ -29,6 +29,7 @@ class PoolRecall(object):
         self.ab_code = ab_code
         self.client_info = client_info
         self.rule_key = rule_key
+        self.no_op_flag = no_op_flag
         self.redis_helper = RedisHelper()
 
     def copy_redis_zset_data(self, from_key_name, to_key_name):
@@ -273,9 +274,7 @@ class PoolRecall(object):
         # log_.info('====== rov pool recall')
 
         # 获取生效中的置顶视频
-        # abCode = 30001   # 老好看视频 / 票圈最惊奇 首页/相关推荐逻辑更新实验
-        if self.ab_code in [config_.AB_CODE['rov_rank_appType_18_19'], config_.AB_CODE['rov_rank_appType_19']]:
-            # appType: [18, 19],不获取置顶视频
+        if self.no_op_flag:
             top_video_ids, top_video_result = [], []
         elif self.client_info is None:
             # 无用户位置信息时,不获取置顶视频
@@ -285,9 +284,7 @@ class PoolRecall(object):
             # log_.info('===top video result = {}'.format(top_video_ids))
 
         # 获取修改过rov的视频
-        # abCode = 30001   # 老好看视频 / 票圈最惊奇 首页/相关推荐逻辑更新实验
-        if self.ab_code in [config_.AB_CODE['rov_rank_appType_18_19'], config_.AB_CODE['rov_rank_appType_19']]:
-            # appType: [18, 19],不获取修改过rov的视频
+        if self.no_op_flag:
             update_rov_video_ids, update_rov_result = [], []
         else:
             update_rov_video_ids, update_rov_result = self.get_update_rov_videos()
@@ -303,9 +300,11 @@ class PoolRecall(object):
         rov_pool_key, last_rov_recall_key, idx = self.get_video_last_idx()
         if not rov_pool_key:
             log_.info('ROV召回池中无视频')
-            if not update_rov_result:
+            if (not update_rov_dup_result) and (not top_video_result):
                 return []
-            return update_rov_result
+            rov_pool_recall_result = top_video_result.extend(update_rov_dup_result)
+            rov_pool_recall_result.sort(key=lambda x: x.get('rovScore', 0), reverse=True)
+            return rov_pool_recall_result[:size]
         rov_pool_recall_result = []
         # 每次获取的视频数
         get_size = size * 5