Jelajahi Sumber

Merge branch 'feature_2023080810_liqian_add_ad_abtest' into dev

liqian 1 tahun lalu
induk
melakukan
fbdb45b98d
5 mengubah file dengan 290 tambahan dan 262 penghapusan
  1. 80 1
      config.py
  2. 58 39
      rank_service.py
  3. 89 133
      recommend.py
  4. 44 55
      video_rank.py
  5. 19 34
      video_recall.py

+ 80 - 1
config.py

@@ -165,6 +165,9 @@ class BaseConfig(object):
             'abtest_394': 60066,
             'abtest_395': 60067,
             'abtest_396': 60068,
+            'abtest_407': 60069,
+            'abtest_408': 60070,
+            'abtest_409': 60071,
         },  # 地域分组小时级规则实验
 
         'rank_by_24h': {
@@ -424,7 +427,19 @@ class BaseConfig(object):
         '396': {
             'data_key': 'data10', 'rule_key': 'rule7',
             'ab_code': AB_CODE['region_rank_by_h'].get('abtest_396')
-        }
+        },
+        '407': {
+            'data_key': 'data10', 'rule_key': 'rule7',
+            'ab_code': AB_CODE['region_rank_by_h'].get('abtest_407')
+        },
+        '408': {
+            'data_key': 'data10', 'rule_key': 'rule7',
+            'ab_code': AB_CODE['region_rank_by_h'].get('abtest_408')
+        },
+        '409': {
+            'data_key': 'data10', 'rule_key': 'rule7',
+            'ab_code': AB_CODE['region_rank_by_h'].get('abtest_409')
+        },
     }
 
     # APP ab实验配置
@@ -988,6 +1003,14 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group', 'return0share1mids'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + [else, return0share1mids]非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '190-h': {
+            'video': {'data': 'videos21out'},
+            'user': {'data': 'user21out', 'rule': 'rule2'},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
 
         # 票圈视频
         # '194-a': {
@@ -1050,6 +1073,14 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group', 'return0share1mids'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + [else, return0share1mids]非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '194-j': {
+            'video': {'data': 'videos4out'},
+            'user': {'data': 'user4out', 'rule': 'rule2'},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
 
         # 内容精选
         # '195-a': {
@@ -1162,6 +1193,14 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group', 'return0share1mids'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + [else, return0share1mids]非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '196-h': {
+            'video': {'data': 'videos6out'},
+            'user': {'data': 'user6out', 'rule': 'rule2'},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
 
         # 老好看视频
         # '197-a': {
@@ -1210,6 +1249,14 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group', 'return0share1mids'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + [else, return0share1mids]非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '197-h': {
+            'video': {'data': 'videos18out'},
+            'user': {'data': 'user18out', 'rule': 'rule2'},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
 
         # 票圈最惊奇
         # '198-a': {
@@ -1258,6 +1305,14 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group', 'return0share1mids'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + [else, return0share1mids]非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '198-h': {
+            'video': {'data': 'videos19out'},
+            'user': {'data': 'user19out', 'rule': 'rule2'},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
 
         # 票圈足迹
         # '242-a': {
@@ -1294,6 +1349,14 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group', 'return0share1mids'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + [else, return0share1mids]非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '242-f': {
+            'video': {'data': 'videos22out'},
+            'user': {'data': 'user22out', 'rule': 'rule2'},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
 
         # 票圈福年
         # '243-a': {
@@ -1330,6 +1393,14 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group', 'return0share1mids'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + [else, return0share1mids]非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '243-f': {
+            'video': {'data': 'videos3out'},
+            'user': {'data': 'user3out', 'rule': 'rule2'},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
 
         # 票圈|信仰之路
         '324-a': {
@@ -1348,6 +1419,14 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group', 'return0share1mids'],
         },  # vlog端所有广告类型数据 + 优化阈值计算方式 + [else, return0share1mids]非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '324-c': {
+            'video': {'data': 'videos0out'},
+            'user': {'data': 'user0out', 'rule': 'rule2'},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+        },  # vlog端所有广告类型数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
 
     }
 

+ 58 - 39
rank_service.py

@@ -9,8 +9,6 @@ log_ = Log()
 
 config_ = set_config()
 def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
-    recall_list = []
-    vidKeys = []
     feature_dict = {}
     # defult value
     apptype = 4
@@ -21,6 +19,9 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     machineinfo_platform = 'android'
     sencetype = 100078
     machineinfo_model ='M2006C3LC'
+    city_code = -1
+    province_code = -1
+    relevant_video_id = -1
     if env_dict and len(env_dict)>0:
         apptype = env_dict.get('app_type',4)
         pagesource = env_dict.get('pagesource', '')
@@ -30,10 +31,25 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
         recommendsource = env_dict.get('recommendsource', '0')
         machineinfo_platform = env_dict.get('machineinfo_platform', '')
         sencetype = env_dict.get('sencetype', '')
-    redisObj = RedisHelper()
-    pre_str = "v_ctr:"
-    hour_pre_str = "v_hour_ctr:"
-    hour_vidKeys = []
+        city_code = env_dict.get('city_code', -1)
+        province_code = env_dict.get('province_code', -1)
+        relevant_video_id = env_dict.get('relevant_video_id', -1)
+        if city_code and city_code == "":
+            city_code = -1
+        if province_code and province_code == "":
+            province_code = -1
+        try:
+            city_code = int(city_code)
+        except:
+            city_code = -1
+        try:
+            province_code = int(province_code)
+        except:
+            province_code = -1
+        try:
+            relevant_video_id = int(relevant_video_id)
+        except:
+            relevant_video_id = -1
     mid_list = []
     videoid_list = []
     apptype_list = []
@@ -41,7 +57,6 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     versioncode_list = []
     machineinfo_brand_list = []
     machineinfo_model_list = []
-    recommendsource_list = []
     machineinfo_platform_list = []
     sencetype_list = []
     day_rov_list = []
@@ -56,33 +71,33 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     day_view_pv_list = []
     day_view_users_list = []
     day_share_users = []
-    for recall_item in data['rov_pool_recall']:
-        if len(recall_item)<=0:
-            continue
-        vid = recall_item.get("videoId",0)
-        mid_list.append(mid)
-        videoid_list.append(int(vid))
-        apptype_list.append(apptype)
-        pagesource_list.append(pagesource)
-        versioncode_list.append(versioncode)
-        machineinfo_brand_list.append(machineinfo_brand)
-        machineinfo_model_list.append(machineinfo_model)
-        recommendsource_list.append(recommendsource)
-        machineinfo_platform_list.append(machineinfo_platform)
-        sencetype_list.append(sencetype)
-        vidKeys.append(pre_str + str(vid))
-        hour_vidKeys.append(hour_pre_str+str(vid))
-        recall_list.append(recall_item)
-    #print("vidKeys:", vidKeys)
-    video_static_info = redisObj.get_batch_key(vidKeys)
-    video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
+    recommendsource_list = []
+    relevant_video_list = []
+    recall_list = env_dict.get('recall_list', [])
+    city_list = []
+    province_list = []
+    if recall_list and len(recall_list)>0:
+        for i in range(len(recall_list)):
+            mid_list.append(mid)
+            videoid_list.append(int(recall_list[i]))
+            apptype_list.append(apptype)
+            pagesource_list.append(pagesource)
+            versioncode_list.append(versioncode)
+            machineinfo_brand_list.append(machineinfo_brand)
+            machineinfo_model_list.append(machineinfo_model)
+            recommendsource_list.append(recommendsource)
+            machineinfo_platform_list.append(machineinfo_platform)
+            sencetype_list.append(sencetype)
+            city_list.append(city_code)
+            province_list.append(province_code)
+            relevant_video_list.append(relevant_video_id)
+    video_static_info = env_dict.get('vid_day_fea_list', [])
+    video_hour_static_info = env_dict.get('vid_hour_fea_list', [])
     #print("video_static_info:",video_static_info)
-    if video_static_info:
+    if video_static_info and len(video_static_info)>0:
         for i in range(len(video_static_info)):
             try:
-                # print(video_scores[i])
-                vid = vidKeys[i].replace(pre_str,"")
-                if video_static_info[i] :
+                if video_static_info[i]  and len(video_static_info[i])>0:
                     per_video_staic = json.loads(video_static_info[i])
                     day_rov_list.append(float(per_video_staic[0]))
                     day_share_return_score_list.append(float(per_video_staic[1]))
@@ -118,12 +133,10 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     hour_return_rate_list = []
     hour_ctr_score_list = []
 
-    if video_hour_static_info:
+    if video_hour_static_info and len(video_hour_static_info)>0:
         for i in range(len(video_hour_static_info)):
             try:
-                # print(video_scores[i])
-                vid = hour_vidKeys[i].replace(hour_pre_str,"")
-                if video_hour_static_info[i] :
+                if video_hour_static_info[i] and len(video_hour_static_info[i])>0:
                     per_hour_video_staic = json.loads(video_hour_static_info[i])
                     hour_rov_list.append(float(per_hour_video_staic[0]))
                     hour_share_return_score_list.append(float(per_hour_video_staic[1]))
@@ -177,7 +190,10 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     feature_dict["hour_share_score_list"] = hour_share_score_list
     feature_dict["hour_return_rate_list"] = hour_return_rate_list
     feature_dict["hour_ctr_score_list"] = hour_ctr_score_list
-    return feature_dict, recall_list
+    feature_dict["city_code"] = city_list
+    feature_dict["province_code"] = province_list
+    feature_dict["relevant_video_id"] = relevant_video_list
+    return feature_dict
 
 
 def insert_static_default_fea(day_ctr_score_list, day_play_pv_list, day_play_users_list, day_return_rate_list,
@@ -224,9 +240,12 @@ def get_tf_serving_sores(feature_dict):
                     # "day_view_users":feature_dict["day_view_users_list"],
                     "hour_rov": feature_dict["hour_rov_list"],
                     "hour_share_score": feature_dict["hour_share_score_list"],
-                    "hour_share_return_score": feature_dict["hour_share_return_score_list"],
-                    "hour_return_rate": feature_dict["hour_return_rate_list"],
-                    "hour_ctr_score": feature_dict["hour_ctr_score_list"]
+                    #"hour_share_return_score": feature_dict["hour_share_return_score_list"],
+                    #"hour_return_rate": feature_dict["hour_return_rate_list"],
+                    #"hour_ctr_score": feature_dict["hour_ctr_score_list"],
+                    "city_code": feature_dict['city_code'],
+                    "province_code": feature_dict['province_code'],
+                    "relevant_video_id":feature_dict['relevant_video_id']
     }
     request_data_dict= {}
     request_data_dict["inputs"] = inputs_data

+ 89 - 133
recommend.py

@@ -215,33 +215,33 @@ def video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type, al
         exp_config = pool_recall.get_video_recall_config()
     #print("exp_config:", exp_config)
     if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
-        t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time, ab_code, exp_config)]
+        t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time)]
         if ab_code==60058:
-            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid, exp_config))
-            t.append(gevent.spawn(pool_recall.get_play_reall, mid, exp_config))
+            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid))
+            t.append(gevent.spawn(pool_recall.get_play_reall, mid))
         elif  ab_code==60059:
-            t.append(gevent.spawn(pool_recall.get_word2vec_item_reall, exp_config))
+            t.append(gevent.spawn(pool_recall.get_word2vec_item_reall))
         elif  ab_code==60061 or ab_code==60063:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
         elif  ab_code==60062:
-            t.append(gevent.spawn(pool_recall.get_U2U2I_reall, mid, exp_config))
+            t.append(gevent.spawn(pool_recall.get_U2U2I_reall, mid))
         elif  ab_code==60064:
-            t.append(gevent.spawn(pool_recall.get_return_video_reall, exp_config))
+            t.append(gevent.spawn(pool_recall.get_return_video_reall))
     else:
-        t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time, ab_code, exp_config),
+        t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time),
              gevent.spawn(pool_recall.flow_pool_recall, size, config_.QUICK_FLOW_POOL_ID),
              gevent.spawn(pool_recall.flow_pool_recall, size)]
         if ab_code==60058:
-            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid, exp_config))
-            t.append(gevent.spawn(pool_recall.get_play_reall, mid, exp_config))
+            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid))
+            t.append(gevent.spawn(pool_recall.get_play_reall, mid))
         elif ab_code == 60059:
-            t.append(gevent.spawn(pool_recall.get_word2vec_item_reall, exp_config))
+            t.append(gevent.spawn(pool_recall.get_word2vec_item_reall))
         elif ab_code == 60061 or ab_code==60063:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
         elif ab_code == 60062:
-            t.append(gevent.spawn(pool_recall.get_U2U2I_reall, mid, exp_config))
+            t.append(gevent.spawn(pool_recall.get_U2U2I_reall, mid))
         elif  ab_code==60064:                                                                         
-            t.append(gevent.spawn(pool_recall.get_return_video_reall, exp_config))
+            t.append(gevent.spawn(pool_recall.get_return_video_reall))
     # 最惊奇相关推荐实验
     # elif ab_code == config_.AB_CODE['top_video_relevant_appType_19']:
     #     t = [gevent.spawn(pool_recall.relevant_recall_19, video_id, size, expire_time),
@@ -346,14 +346,14 @@ def video_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type, al
     #else:
     #print("data['hot_recall']", data['hot_recall'])
     # 60058: u2itag, 60059:word2vec, 60061: sim_recall, 60062: u2u2i
-    if ab_code == 60058 or ab_code == 60059 or ab_code == 60060 or ab_code == 60061 \
-        or ab_code == 60062 or ab_code== 60063 or ab_code == 60064:
-        rank_result, flow_num = video_sank_pos_rank(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_Code=ab_code, exp_config=exp_config)
-        result['flow_num'] = flow_num
-        if rank_result:
-            result['rank_num'] = len(rank_result)
-    else:
-        rank_result = video_rank(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P))
+    # if ab_code == 60058 or ab_code == 60059 or ab_code == 60060 or ab_code == 60061 \
+    #     or ab_code == 60062 or ab_code== 60063 or ab_code == 60064:
+    #     rank_result, flow_num = video_sank_pos_rank(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_Code=ab_code, exp_config=exp_config)
+    #     result['flow_num'] = flow_num
+    #     if rank_result:
+    #         result['rank_num'] = len(rank_result)
+    # else:
+    rank_result = video_rank(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P))
 
     # 老视频实验
     # if ab_code in [config_.AB_CODE['old_video']]:
@@ -441,51 +441,36 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                              client_info=client_info, rule_key=rule_key, data_key=data_key, no_op_flag=no_op_flag,
                              params=params, rule_key_30day=rule_key_30day, shield_config=shield_config, video_id= video_id)
 
-    exp_config = None
-    if ab_code == 60052:
-        exp_config = pool_recall.get_flow_exp_7_config()
-    elif ab_code == 60053:
-        exp_config = pool_recall.get_flow_exp_8_config()
-    elif ab_code == 60057:
-        exp_config = pool_recall.get_flow_exp_6_config()
-    #print("exp_config:", exp_config)
-
+    exp_config = pool_recall.get_sort_ab_codel_config()
+    # 60054 全量: simrecall+融合排序
     if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
-        t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time, ab_code, exp_config)]
+        t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time)]
         if ab_code ==60054 or ab_code==60066:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
-        if ab_code == 60055:
-            t.append(gevent.spawn(pool_recall.get_return_video_reall, exp_config))
-        if ab_code == 60056:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
-            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid , exp_config))
-        if ab_code == 60065 :
-            t.append(gevent.spawn(pool_recall.get_return_video_reall, exp_config, 'rv2:'))
-        if ab_code ==60067 :
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
-            t.append(gevent.spawn(pool_recall.get_return_video_reall, exp_config))
-        if ab_code == 60068:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
-            t.append(gevent.spawn(pool_recall.get_return_video_reall, exp_config, 'rv2:'))
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+        if ab_code == 60056 or ab_code == 60071:
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid))
+        if ab_code ==60067  or ab_code == 60069:
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+            t.append(gevent.spawn(pool_recall.get_return_video_reall))
+        if ab_code == 60068 or ab_code == 60070:
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+            t.append(gevent.spawn(pool_recall.get_return_video_reall, 'rv2:'))
     else:
-        t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time, ab_code, exp_config),
+        t = [gevent.spawn(pool_recall.rov_pool_recall_with_region, size, expire_time),
              gevent.spawn(pool_recall.flow_pool_recall, size, config_.QUICK_FLOW_POOL_ID),
              gevent.spawn(pool_recall.flow_pool_recall, size)]
         if  ab_code == 60054 or ab_code==60066:
             t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
-        if ab_code == 60055:
-            t.append(gevent.spawn(pool_recall.get_return_video_reall, exp_config))
-        if ab_code == 60056:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
-            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid , exp_config))
-        if ab_code == 60065:
-            t.append(gevent.spawn(pool_recall.get_return_video_reall, exp_config, 'rv2:'))
-        if ab_code == 60067:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
-            t.append(gevent.spawn(pool_recall.get_return_video_reall, exp_config))
-        if ab_code == 60068:
-            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter, exp_config))
-            t.append(gevent.spawn(pool_recall.get_return_video_reall, exp_config, 'rv2:'))
+        if ab_code == 60056 or ab_code == 60071:
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+            t.append(gevent.spawn(pool_recall.get_U2I_reall, mid))
+        if ab_code == 60067 or ab_code == 60069:
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+            t.append(gevent.spawn(pool_recall.get_return_video_reall))
+        if ab_code == 60068 or ab_code == 60070:
+            t.append(gevent.spawn(pool_recall.get_sim_hot_item_reall_filter))
+            t.append(gevent.spawn(pool_recall.get_return_video_reall, 'rv2:'))
 
     gevent.joinall(t)
     recall_result_list = [i.get() for i in t]
@@ -495,7 +480,7 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
         result['recallResult']= []
         result['rankResult'] = []
         return result
-    #1. merge simrecall
+    #1. merge simrecall or  deepfm
     if ab_code == 60054 or ab_code==60066:
         rov_pool_recall = []
         if len(recall_result_list) >= 2:
@@ -522,35 +507,8 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                         now_video_ids.add(video_id)
             if len(rov_pool_recall) > 0:
                 recall_result_list[0] = rov_pool_recall
-    # merge return video
-    if ab_code == 60055 or ab_code==60065:
-        rov_pool_recall = []
-        if len(recall_result_list)>=2:
-            region_recall = recall_result_list[0]
-            return_video_reall = []
-            if app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
-                return_video_reall, = recall_result_list[1]
-            else:
-                if len(recall_result_list)>=4:
-                    return_video_reall = recall_result_list[3]
-            #print("sim:",sim_recall)
-            now_video_ids = set('')
-            if len(region_recall)>0:
-                for video in region_recall:
-                    video_id = video.get('videoId')
-                    if video_id not in now_video_ids:
-                        rov_pool_recall.append(video)
-                        now_video_ids.add(video_id)
-            if len(return_video_reall)>0:
-                for video in return_video_reall:
-                    video_id = video.get('videoId')
-                    if video_id not in now_video_ids:
-                        rov_pool_recall.append(video)
-                        now_video_ids.add(video_id)
-            if len(rov_pool_recall)>0:
-                recall_result_list[0] = rov_pool_recall
-    # merge simrecall, merge u2i title recall
-    if ab_code == 60056:
+    # merge simrecall, merge u2i title recall, deepfm
+    if ab_code == 60056 or ab_code == 60071:
         rov_pool_recall = []
         if len(recall_result_list)>=2:
             region_recall = recall_result_list[0]
@@ -565,6 +523,7 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                     sim_recall = recall_result_list[3]
                 if len(recall_result_list) >= 5:
                     u2i_title_recall = recall_result_list[4]
+            #print("u2i_title_recall:", u2i_title_recall)
             now_video_ids = set('')
             if len(region_recall)>0:
                 for video in region_recall:
@@ -589,7 +548,7 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                 recall_result_list[0] = rov_pool_recall
 
     #2. merge simrecall, return video
-    if ab_code == 60067 or ab_code==60068:
+    if ab_code == 60067 or ab_code==60068 or ab_code==60069 or ab_code==60070:
         rov_pool_recall = []
         if len(recall_result_list)>=2:
             region_recall = recall_result_list[0]
@@ -665,40 +624,46 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                 'flow_pool_recall': recall_result_list[2]
             }
     # 3. 特征回流
-    #print(env_dict)
+    rec_recall_list = []
+    vidKeys = []
+    hour_vidKeys = []
+    pre_str = "v_ctr:"
+    pre_hour_str = "v_hour_ctr:"
+    rec_recall_item_list = []
+    for recall_item in data['rov_pool_recall']:
+        if len(recall_item) <= 0:
+            continue
+        vid = recall_item.get("videoId", 0)
+        rec_recall_list.append(vid)
+        vidKeys.append(pre_str + str(vid))
+        hour_vidKeys.append(pre_hour_str + str(vid))
+        rec_recall_item_list.append(recall_item)
+    redisObj = RedisHelper()
+    video_static_info = redisObj.get_batch_key(vidKeys)
+    video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
+    vid_day_fea_list = []
+    vid_hour_fea_list = []
+    if video_static_info:
+        vid_day_fea_list = video_static_info
+    if video_hour_static_info:
+        vid_hour_fea_list = video_hour_static_info
     if env_dict:
-        province_code = client_info.get('provinceCode', '-1')
-        city_code = client_info.get('cityCode', '-1')
+        province_code = client_info.get('provinceCode', -1)
+        if province_code and province_code == "":
+            province_code =-1
+        city_code = client_info.get('cityCode', -1)
+        if city_code and city_code == "":
+            city_code = -1
         env_dict['mid'] = mid
         env_dict['province_code'] = province_code
         env_dict['city_code'] = city_code
-        rec_recall_list = []
-        vidKeys = []
-        hour_vidKeys = []
-        pre_str = "v_ctr:"
-        pre_hour_str = "v_hour_ctr:"
-        for recall_item in data['rov_pool_recall']:
-            if len(recall_item) <= 0:
-                continue
-            vid = recall_item.get("videoId", 0)
-            rec_recall_list.append(vid)
-            vidKeys.append(pre_str + str(vid))
-            hour_vidKeys.append(pre_hour_str + str(vid))
-        redisObj = RedisHelper()
-        video_static_info = redisObj.get_batch_key(vidKeys)
-        video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
-        vid_day_fea_list = []
-        vid_hour_fea_list = []
-        if video_static_info:
-            vid_day_fea_list = video_static_info
-        if video_hour_static_info:
-            vid_hour_fea_list = video_hour_static_info
+
         env_dict['recall_list'] = rec_recall_list
         env_dict['vid_day_fea_list'] = vid_day_fea_list
         env_dict['vid_hour_fea_list'] = vid_hour_fea_list
         env_json = env_dict
     #4.
-    rank_result, flow_num  = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_code=ab_code, mid=mid, exp_config=exp_config, env_dict=env_dict)
+    rank_result, flow_num  = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_code=ab_code, mid=mid, exp_config=exp_config, env_dict=env_dict, rec_recall_item_list=rec_recall_item_list)
     #print(rank_result)
     if rank_result:
         result['rank_num'] = len(rank_result)
@@ -1719,14 +1684,13 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
                              rule_key=rule_key, data_key=data_key,
                              no_op_flag=no_op_flag, old_video_index=old_video_index,
                              params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
-    # 60052,60053,60057(test7, test8, test6) 融合排序
-    # simrecal: 60054 +融合
+    # simrecal: 60054 +融合, 全量
     # return video, return video2
     # old video: 60056, test2
-    elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 \
-            or ab_code == 60055 or ab_code == 60056 or ab_code==60057  \
-            or ab_code==60065 or ab_code == 60067 or ab_code==60068 \
-            or ab_code==60066:
+    elif  ab_code == 60054 \
+             or ab_code == 60056 \
+             or ab_code == 60067 or ab_code==60068 \
+            or ab_code==60066 or ab_code == 60069 or ab_code == 60070 or ab_code == 60071:
         result, fea_info = video_old_recommend(request_id=request_id,
                                      mid=mid, uid=uid, app_type=app_type,
                                      size=size, top_K=top_K, flow_pool_P=flow_pool_P,
@@ -1769,9 +1733,6 @@ def video_homepage_recommend(request_id, mid, uid, size, app_type, algo_type,
     update_redis_st = time.time()
     if ab_code == 60047 or  ab_code == 60048 or  ab_code == 60049:
         update_flow_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
-    # elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 \
-    #         or ab_code == 60056 or ab_code==60057:
-    #     update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
     else:
         update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
     # log_.info({
@@ -1844,9 +1805,9 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
                                  rule_key=rule_key, data_key=data_key, no_op_flag=no_op_flag,
                                  old_video_index=old_video_index, video_id=video_id,
                                  params=params, rule_key_30day=rule_key_30day, shield_config=shield_config)
-    elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or \
-            ab_code == 60056 or ab_code==60057 or ab_code ==60065 or ab_code ==60067 \
-            or ab_code ==60068 or ab_code==60066:
+    elif   ab_code == 60054  or \
+            ab_code == 60056 or ab_code ==60067 \
+            or ab_code ==60068 or ab_code==60066 or ab_code == 60069 or ab_code == 60070 or ab_code == 60071:
         result, fea_info = video_old_recommend(request_id=request_id,
                                  mid=mid, uid=uid, app_type=app_type,
                                  size=size, top_K=top_K, flow_pool_P=flow_pool_P,
@@ -1890,11 +1851,6 @@ def video_relevant_recommend(request_id, video_id, mid, uid, size, app_type, ab_
     update_redis_st = time.time()
     if ab_code == 60047 or ab_code == 60048 or  ab_code == 60049:
          update_flow_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
-    # elif ab_code == 60050 or ab_code == 60051:
-    #      update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
-    # elif ab_code == 60052 or ab_code == 60053 or ab_code == 60054 or ab_code == 60055 or \
-    #         ab_code == 60056 or ab_code==60057 or ab_code ==60065:
-    #      update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
     else:
          update_redis_data(result=rank_result, app_type=app_type, mid=mid, top_K=top_K)
     # log_.info({

+ 44 - 55
video_rank.py

@@ -593,7 +593,7 @@ def video_rank_with_old_video(rank_result, old_video_recall, size, top_K, old_vi
     return new_rank_result[:size]
 
 
-def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=None, env_dict=None):
+def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=None, env_dict=None, rec_recall_item_list=None):
     """
         视频分发排序
         :param data: 各路召回的视频 type-dict {'rov_pool_recall': [], 'flow_pool_recall': []}
@@ -602,65 +602,66 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
         :param flow_pool_P: size-top_K视频为流量池视频的概率 type-float
         :return: rank_result
         """
-    if not data['rov_pool_recall'] and not data['flow_pool_recall']:
+    if not rec_recall_item_list and not data['flow_pool_recall']:
         return [], 0
 
-
     #全量的是vlog,票圈精选, 334,60057,
     # 60054: simrecall,
-    # 60052: 票圈精选,融合排序,60053:空置
-    # 60055: video_reall, 60065: video_recall2
-    # 60056: get_U2I_reall
     pre_str = "k_p2:"
-    if ab_code==60057:
-        pre_str = "k_p2:"
-    elif ab_code == 60052:
-        pre_str = "k_p5:"
-    elif ab_code == 60053:
-        pre_str = "k_p8:"
-    elif ab_code == 60054:
-        pre_str = "k_p3:"
-    elif ab_code == 60055:
-        pre_str = "k_p4:"
-    elif ab_code == 60056:
-        pre_str = "k_p7:"
     #print("pre_str:", pre_str)
-    recall_list = []
-    rov_recall_rank = data['rov_pool_recall']
+    rov_recall_rank = rec_recall_item_list
     #call rank service
     #flag_call_service = 0
-    if ab_code == 60066:
-        feature_dict, recall_list = get_featurs(mid, data, size, top_K, flow_pool_P, env_dict)
+    sort_index = 0
+    if exp_config and "sort_flag" in exp_config:
+        sort_index = exp_config["sort_flag"]
+    #print("sort_index:", sort_index)
+    redisObj = RedisHelper()
+    vidKeys = []
+    for recall_item in rec_recall_item_list:
+        vid = recall_item.get("videoId", 0)
+        vidKeys.append(pre_str + str(vid))
+    video_scores = redisObj.get_batch_key(vidKeys)
+    if ab_code == 60066 or ab_code == 60069 or ab_code == 60070 or ab_code == 60071:
+        feature_dict = get_featurs(mid, data, size, top_K, flow_pool_P, env_dict)
         score_result = get_tf_serving_sores(feature_dict)
-        if score_result and len(score_result) > 0 and len(score_result) == len(recall_list):
+        if video_scores and len(video_scores)>0  and rec_recall_item_list and score_result and len(score_result) > 0\
+                and len(score_result) == len(rec_recall_item_list) and len(video_scores)== len(score_result):
             for i in range(len(score_result)):
-                recall_list[i]['sort_score'] = score_result[i][0]
-                recall_list[i]['flag_call_service'] = 1
-            rov_recall_rank = sorted(recall_list, key=lambda k: k.get('sort_score', 0), reverse=True)
+                try:
+                    if video_scores[i] is None and len(score_result[i])>0:
+                        return_score = 0.000000001
+                        total_score = return_score * score_result[i][0]
+                        rec_recall_item_list[i]['sort_score'] = total_score
+                    else:
+                        video_score_str = json.loads(video_scores[i])
+                        if len(video_score_str)>= sort_index and  len(video_score_str)>0:
+                            return_score = video_score_str[sort_index]
+                        else:
+                            return_score = 0.000000001
+                        total_score = return_score * score_result[i][0]
+                        rec_recall_item_list[i]['sort_score'] = total_score
+                except Exception:
+                    return_score = 0.000000001
+                    total_score = return_score * 0.00000001
+                    rec_recall_item_list[i]['sort_score'] = total_score
+                rec_recall_item_list[i]['flag_call_service'] = 1
+            rov_recall_rank = sorted(rec_recall_item_list, key=lambda k: k.get('sort_score', 0), reverse=True)
         else:
-            rov_recall_rank = sup_rank(data, pre_str, recall_list, rov_recall_rank)
+            rov_recall_rank = sup_rank(video_scores, rec_recall_item_list)
     else:
-        redisObj = RedisHelper()
-        vidKeys = []
-        for recall_item in data['rov_pool_recall']:
-            if len(recall_item) <= 0:
-                continue
-            vid = recall_item.get("videoId", 0)
-            vidKeys.append(pre_str + str(vid))
-            recall_list.append(recall_item)
-        video_scores = redisObj.get_batch_key(vidKeys)
-        if video_scores and len(recall_list) > 0:
+        if video_scores and len(rec_recall_item_list) > 0 and len(video_scores)>0:
             for i in range(len(video_scores)):
                 try:
                     if video_scores[i] is None:
-                        recall_list[i]['sort_score'] = 0.0
+                        rec_recall_item_list[i]['sort_score'] = 0.0
                     else:
                         video_score_str = json.loads(video_scores[i])
                         # print("video_score_str:", video_score_str)
-                        recall_list[i]['sort_score'] = video_score_str[0]
+                        rec_recall_item_list[i]['sort_score'] = video_score_str[0]
                 except Exception:
-                    recall_list[i]['sort_score'] = 0.0
-            rov_recall_rank = sorted(recall_list, key=lambda k: k.get('sort_score', 0), reverse=True)
+                    rec_recall_item_list[i]['sort_score'] = 0.0
+            rov_recall_rank = sorted(rec_recall_item_list, key=lambda k: k.get('sort_score', 0), reverse=True)
     #print(rov_recall_rank)
     flow_recall_rank = sorted(data['flow_pool_recall'], key=lambda k: k.get('rovScore', 0), reverse=True)
     rov_recall_rank, flow_recall_rank = remove_duplicate(rov_recall=rov_recall_rank, flow_recall=flow_recall_rank,
@@ -678,8 +679,7 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
         # 按概率 p 及score排序获取 size - k 个视频
     flow_num = 0
     flowConfig = 0
-    if exp_config and exp_config['flowConfig']:
-        flowConfig = exp_config['flowConfig']
+    # 本段代码控制流量池,通过实验传参,现不动
     if flowConfig == 1 and len(rov_recall_rank) > 0:
         for recall_item in rank_result:
             flow_recall_name = recall_item.get("flowPool", '')
@@ -734,18 +734,7 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
 
 
 # 排序服务兜底
-def sup_rank(data, pre_str, recall_list, rov_recall_rank):
-    redisObj = RedisHelper()
-    vidKeys = []
-    for recall_item in data['rov_pool_recall']:
-        if len(recall_item) <= 0:
-            continue
-        vid = recall_item.get("videoId", 0)
-        vidKeys.append(pre_str + str(vid))
-        recall_list.append(recall_item)
-    video_scores = redisObj.get_batch_key(vidKeys)
-    #print("vidKeys:", video_scores, "\t", vidKeys)
-    #print(len(video_scores), len(recall_list))
+def sup_rank(video_scores, recall_list):
     if video_scores and len(recall_list) > 0:
         for i in range(len(video_scores)):
             try:

+ 19 - 34
video_recall.py

@@ -1210,7 +1210,7 @@ class PoolRecall(object):
             idx = 0
         return key_name, last_region_dup_key, idx
 
-    def rov_pool_recall_with_region_process(self, size=4, expire_time=24*3600, ab_code=None, exp_config=None):
+    def rov_pool_recall_with_region_process(self, size=4, expire_time=24*3600):
         """
         地域分组召回视频
         :param size: 获取视频个数
@@ -1275,16 +1275,16 @@ class PoolRecall(object):
         now_video_ids = []
         recall_result = []
         recall_num = size
-        if ab_code and exp_config:
-            if ab_code==60058 or ab_code==60059 or ab_code == 60060 \
-                    or ab_code == 60061 or ab_code==60052 \
-                    or ab_code==60053 or ab_code==60057 :
-                try:
-                    recall_num = int(exp_config['recall_num'])
-                except:
-                    recall_num = size
-        if recall_num<size:
-            recall_num = size
+        # if ab_code and exp_config:
+        #     if ab_code==60058 or ab_code==60059 or ab_code == 60060 \
+        #             or ab_code == 60061 or ab_code==60052 \
+        #             or ab_code==60053 or ab_code==60057 :
+        #         try:
+        #             recall_num = int(exp_config['recall_num'])
+        #         except:
+        #             recall_num = size
+        # if recall_num<size:
+        #     recall_num = size
         for region_result in region_recall_result_list:
             for video in region_result:
                 video_id = video.get('videoId')
@@ -1323,10 +1323,10 @@ class PoolRecall(object):
         #print("recall_result:", recall_result[:recall_num])
         return recall_result[:recall_num]
 
-    def rov_pool_recall_with_region(self, size=4, expire_time=24*3600, ab_code=None, exp_config=None):
+    def rov_pool_recall_with_region(self, size=4, expire_time=24*3600):
         """召回池召回视频"""
         # 获取召回池中视频
-        videos = self.rov_pool_recall_with_region_process(size=size, expire_time=expire_time, ab_code=ab_code, exp_config=exp_config)
+        videos = self.rov_pool_recall_with_region_process(size=size, expire_time=expire_time)
         # 对在流量池中存在的视频添加标记字段
         result = []
         for item in videos:
@@ -2137,7 +2137,7 @@ class PoolRecall(object):
                 )
         return recall_result[:200]
 
-    def get_sim_hot_item_reall_filter(self, exp_config=None):
+    def get_sim_hot_item_reall_filter(self):
         if self.video_id is None:
             return  []
         recall_key = "sim_hot_" + str(self.video_id)
@@ -2163,11 +2163,6 @@ class PoolRecall(object):
         if len(video_ids)<=0:
             return  recall_result
         recall_num = 20
-        try:
-            if exp_config and exp_config['recall_get_num']:
-                recall_num = int(exp_config['recall_get_num'])
-        except:
-            recall_num = 20
         #print("recall_num:", recall_num)
         video_ids = video_ids[:recall_num]
         #print(video_ids)
@@ -2698,9 +2693,9 @@ class PoolRecall(object):
         else:
             return None
 
-    def get_flow_exp_6_config(self):
-        recall_key = "exp6_exp_config"
-        data = self.redis_helper.get_data_from_redis(key_name=recall_key)
+    def get_sort_ab_codel_config(self):
+        ab_key = "sort_ab_config"
+        data = self.redis_helper.get_data_from_redis(key_name=ab_key)
         if data is not None:
             try:
                 # print(data)
@@ -2713,7 +2708,7 @@ class PoolRecall(object):
             return None
 
 
-    def get_U2I_reall(self, mid, exp_config=None):
+    def get_U2I_reall(self, mid):
         #recall_key = "hot_video:"
         if not mid:
             return  []
@@ -2739,11 +2734,6 @@ class PoolRecall(object):
         if len(video_ids)<=0:
             return  recall_result
         recall_num = 20
-        try:
-            if exp_config and exp_config['recall_get_num']:
-                recall_num = int(exp_config['recall_get_num'])
-        except:
-            recall_num = 20
         #print("recall_num:", recall_num)
         video_ids = video_ids[:recall_num]
         #print(video_ids)
@@ -2820,7 +2810,7 @@ class PoolRecall(object):
         else:
             return None
 
-    def get_return_video_reall(self, exp_config=None, pre_key=None):
+    def get_return_video_reall(self, pre_key=None):
         if self.video_id is None:
             return  []
         recall_key = "rv:"+ str(self.video_id)
@@ -2847,11 +2837,6 @@ class PoolRecall(object):
         if len(video_ids)<=0:
             return  recall_result
         recall_num = 20
-        try:
-            if exp_config and exp_config['recall_get_num']:
-                recall_num = int(exp_config['recall_get_num'])
-        except:
-            recall_num = 20
         #print("recall_num:", recall_num)
         video_ids = video_ids[:recall_num]
         #print(video_ids)