Переглянути джерело

Merge branch 'feature_2023081410_liqian_add_ad_abtest' into dev

liqian 1 рік тому
батько
коміт
7183cf8114
6 змінених файлів з 736 додано та 151 видалено
  1. 366 60
      ad_recommend.py
  2. 190 0
      config.py
  3. 81 44
      rank_service.py
  4. 41 30
      recommend.py
  5. 57 16
      video_rank.py
  6. 1 1
      video_recall.py

+ 366 - 60
ad_recommend.py

@@ -70,6 +70,277 @@ def get_threshold(abtest_id, abtest_config_tag, ab_test_code, mid_group, care_mo
     return threshold
 
 
+def predict_mid_video_res(now_date, mid, video_id, abtest_param, abtest_id, abtest_config_tag, ab_test_code, care_model_status):
+    now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+    user_data_key = abtest_param['user'].get('data')
+    user_rule_key = abtest_param['user'].get('rule')
+    video_data_key = abtest_param['video'].get('data')
+    group_class_key = abtest_param.get('group_class_key')
+    no_ad_mid_group_list = abtest_param.get('no_ad_mid_group_list', [])
+
+    # 判断mid所属分组
+    mid_group_key_name = f"{config_.KEY_NAME_PREFIX_MID_GROUP}{group_class_key}:{mid}"
+    mid_group = redis_helper.get_data_from_redis(key_name=mid_group_key_name)
+    if mid_group is None:
+        mid_group = 'mean_group'
+
+    # 判断用户是否在免广告用户组列表中
+    if mid_group in no_ad_mid_group_list:
+        # 在免广告用户组列表中,则不出广告
+        ad_predict = 1
+        result = {
+            'mid_group': mid_group,
+            'ad_predict': ad_predict
+        }
+    else:
+        # 获取用户组分享率
+        group_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{user_data_key}:{user_rule_key}:{now_dt}"
+        if not redis_helper.key_exists(group_share_rate_key):
+            redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+            group_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{user_data_key}:{user_rule_key}:{redis_dt}"
+        group_share_rate = redis_helper.get_score_with_value(key_name=group_share_rate_key, value=mid_group)
+        # 获取视频分享率
+        video_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{video_data_key}:{now_dt}"
+        if not redis_helper.key_exists(video_share_rate_key):
+            redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+            video_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{video_data_key}:{redis_dt}"
+        video_share_rate = redis_helper.get_score_with_value(key_name=video_share_rate_key, value=int(video_id))
+        if video_share_rate is None:
+            video_share_rate = redis_helper.get_score_with_value(key_name=video_share_rate_key, value=-1)
+
+        # 计算 mid-video 分享率
+        if group_share_rate is None or video_share_rate is None:
+            return None
+        mid_video_predict_res = float(group_share_rate) * float(video_share_rate)
+
+        # 获取对应的阈值
+        threshold = get_threshold(
+            abtest_id=abtest_id,
+            abtest_config_tag=abtest_config_tag,
+            ab_test_code=ab_test_code,
+            mid_group=mid_group,
+            care_model_status=care_model_status,
+            abtest_param=abtest_param
+        )
+        # 阈值判断
+        if mid_video_predict_res > threshold:
+            # 大于阈值,出广告
+            ad_predict = 2
+        else:
+            # 否则,不出广告
+            ad_predict = 1
+        result = {
+            'mid_group': mid_group,
+            'group_share_rate': group_share_rate,
+            'video_share_rate': video_share_rate,
+            'mid_video_predict_res': mid_video_predict_res,
+            'threshold': threshold,
+            'ad_predict': ad_predict}
+    return result
+
+
+def predict_mid_video_res_with_add(now_date, mid, video_id, abtest_param, abtest_id, abtest_config_tag, ab_test_code, care_model_status):
+    now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+
+    # 判断mid所属分组
+    group_class_key = abtest_param.get('group_class_key')
+    mid_group_key_name = f"{config_.KEY_NAME_PREFIX_MID_GROUP}{group_class_key}:{mid}"
+    mid_group = redis_helper.get_data_from_redis(key_name=mid_group_key_name)
+    if mid_group is None:
+        mid_group = 'mean_group'
+
+    # 判断用户是否在免广告用户组列表中
+    no_ad_mid_group_list = abtest_param.get('no_ad_mid_group_list', [])
+    if mid_group in no_ad_mid_group_list:
+        # 在免广告用户组列表中,则不出广告
+        ad_predict = 1
+        result = {
+            'mid_group': mid_group,
+            'ad_predict': ad_predict
+        }
+    else:
+        # 获取用户组出广告后分享的概率
+        share_user_data_key = abtest_param['share']['user'].get('data')
+        share_user_rule_key = abtest_param['share']['user'].get('rule')
+        group_share_rate_key = \
+            f"{config_.KEY_NAME_PREFIX_AD_GROUP}{share_user_data_key}:{share_user_rule_key}:{now_dt}"
+        if not redis_helper.key_exists(group_share_rate_key):
+            redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+            group_share_rate_key = \
+                f"{config_.KEY_NAME_PREFIX_AD_GROUP}{share_user_data_key}:{share_user_rule_key}:{redis_dt}"
+        group_share_rate = redis_helper.get_score_with_value(key_name=group_share_rate_key, value=mid_group)
+
+        # 获取视频出广告后分享的概率
+        share_video_data_key = abtest_param['share']['video'].get('data')
+        video_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{share_video_data_key}:{now_dt}"
+        if not redis_helper.key_exists(video_share_rate_key):
+            redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+            video_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{share_video_data_key}:{redis_dt}"
+        video_share_rate = redis_helper.get_score_with_value(key_name=video_share_rate_key, value=int(video_id))
+        if video_share_rate is None:
+            video_share_rate = redis_helper.get_score_with_value(key_name=video_share_rate_key, value=-1)
+
+        # 获取用户组出广告后不直接跳出的概率
+        out_user_data_key = abtest_param['out']['user'].get('data')
+        out_user_rule_key = abtest_param['out']['user'].get('rule')
+        group_out_rate_key = \
+            f"{config_.KEY_NAME_PREFIX_AD_GROUP}{out_user_data_key}:{out_user_rule_key}:{now_dt}"
+        if not redis_helper.key_exists(group_out_rate_key):
+            redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+            group_out_rate_key = \
+                f"{config_.KEY_NAME_PREFIX_AD_GROUP}{out_user_data_key}:{out_user_rule_key}:{redis_dt}"
+        group_out_rate = redis_helper.get_score_with_value(key_name=group_out_rate_key, value=mid_group)
+
+        # 获取视频出广告后不直接跳出的概率
+        out_video_data_key = abtest_param['out']['video'].get('data')
+        video_out_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{out_video_data_key}:{now_dt}"
+        if not redis_helper.key_exists(video_out_rate_key):
+            redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+            video_out_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{out_video_data_key}:{redis_dt}"
+        video_out_rate = redis_helper.get_score_with_value(key_name=video_out_rate_key, value=int(video_id))
+        if video_out_rate is None:
+            video_out_rate = redis_helper.get_score_with_value(key_name=video_out_rate_key, value=-1)
+
+        # 计算 mid-video 预测值
+        if group_share_rate is None or video_share_rate is None or group_out_rate is None or video_out_rate is None:
+            return None
+        # 加权融合
+        share_weight = abtest_param['mix_param']['share_weight']
+        out_weight = abtest_param['mix_param']['out_weight']
+        group_rate = share_weight * float(group_share_rate) + out_weight * float(group_out_rate)
+        video_rate = share_weight * float(video_share_rate) + out_weight * float(video_out_rate)
+        mid_video_predict_res = group_rate * video_rate
+
+        # 获取对应的阈值
+        threshold = get_threshold(
+            abtest_id=abtest_id,
+            abtest_config_tag=abtest_config_tag,
+            ab_test_code=ab_test_code,
+            mid_group=mid_group,
+            care_model_status=care_model_status,
+            abtest_param=abtest_param
+        )
+        # 阈值判断
+        if mid_video_predict_res > threshold:
+            # 大于阈值,出广告
+            ad_predict = 2
+        else:
+            # 否则,不出广告
+            ad_predict = 1
+        result = {
+            'mid_group': mid_group,
+            'group_share_rate': group_share_rate,
+            'video_share_rate': video_share_rate,
+            'group_out_rate': group_out_rate,
+            'video_out_rate': video_out_rate,
+            'group_rate': group_rate,
+            'video_rate': video_rate,
+            'mid_video_predict_res': mid_video_predict_res,
+            'threshold': threshold,
+            'ad_predict': ad_predict}
+    return result
+
+
+def predict_mid_video_res_with_multiply(now_date, mid, video_id, abtest_param, abtest_id, abtest_config_tag, ab_test_code, care_model_status):
+    now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+
+    # 判断mid所属分组
+    group_class_key = abtest_param.get('group_class_key')
+    mid_group_key_name = f"{config_.KEY_NAME_PREFIX_MID_GROUP}{group_class_key}:{mid}"
+    mid_group = redis_helper.get_data_from_redis(key_name=mid_group_key_name)
+    if mid_group is None:
+        mid_group = 'mean_group'
+
+    # 判断用户是否在免广告用户组列表中
+    no_ad_mid_group_list = abtest_param.get('no_ad_mid_group_list', [])
+    if mid_group in no_ad_mid_group_list:
+        # 在免广告用户组列表中,则不出广告
+        ad_predict = 1
+        result = {
+            'mid_group': mid_group,
+            'ad_predict': ad_predict
+        }
+    else:
+        # 获取用户组出广告后分享的概率
+        share_user_data_key = abtest_param['share']['user'].get('data')
+        share_user_rule_key = abtest_param['share']['user'].get('rule')
+        group_share_rate_key = \
+            f"{config_.KEY_NAME_PREFIX_AD_GROUP}{share_user_data_key}:{share_user_rule_key}:{now_dt}"
+        if not redis_helper.key_exists(group_share_rate_key):
+            redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+            group_share_rate_key = \
+                f"{config_.KEY_NAME_PREFIX_AD_GROUP}{share_user_data_key}:{share_user_rule_key}:{redis_dt}"
+        group_share_rate = redis_helper.get_score_with_value(key_name=group_share_rate_key, value=mid_group)
+
+        # 获取视频出广告后分享的概率
+        share_video_data_key = abtest_param['share']['video'].get('data')
+        video_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{share_video_data_key}:{now_dt}"
+        if not redis_helper.key_exists(video_share_rate_key):
+            redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+            video_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{share_video_data_key}:{redis_dt}"
+        video_share_rate = redis_helper.get_score_with_value(key_name=video_share_rate_key, value=int(video_id))
+        if video_share_rate is None:
+            video_share_rate = redis_helper.get_score_with_value(key_name=video_share_rate_key, value=-1)
+
+        # 获取用户组出广告后不直接跳出的概率
+        out_user_data_key = abtest_param['out']['user'].get('data')
+        out_user_rule_key = abtest_param['out']['user'].get('rule')
+        group_out_rate_key = \
+            f"{config_.KEY_NAME_PREFIX_AD_GROUP}{out_user_data_key}:{out_user_rule_key}:{now_dt}"
+        if not redis_helper.key_exists(group_out_rate_key):
+            redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+            group_out_rate_key = \
+                f"{config_.KEY_NAME_PREFIX_AD_GROUP}{out_user_data_key}:{out_user_rule_key}:{redis_dt}"
+        group_out_rate = redis_helper.get_score_with_value(key_name=group_out_rate_key, value=mid_group)
+
+        # 获取视频出广告后不直接跳出的概率
+        out_video_data_key = abtest_param['out']['video'].get('data')
+        video_out_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{out_video_data_key}:{now_dt}"
+        if not redis_helper.key_exists(video_out_rate_key):
+            redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+            video_out_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{out_video_data_key}:{redis_dt}"
+        video_out_rate = redis_helper.get_score_with_value(key_name=video_out_rate_key, value=int(video_id))
+        if video_out_rate is None:
+            video_out_rate = redis_helper.get_score_with_value(key_name=video_out_rate_key, value=-1)
+
+        # 计算 mid-video 预测值
+        if group_share_rate is None or video_share_rate is None or group_out_rate is None or video_out_rate is None:
+            return None
+        # 乘积融合
+        group_rate = float(group_share_rate) * float(group_out_rate)
+        video_rate = float(video_share_rate) * float(video_out_rate)
+        mid_video_predict_res = group_rate * video_rate
+
+        # 获取对应的阈值
+        threshold = get_threshold(
+            abtest_id=abtest_id,
+            abtest_config_tag=abtest_config_tag,
+            ab_test_code=ab_test_code,
+            mid_group=mid_group,
+            care_model_status=care_model_status,
+            abtest_param=abtest_param
+        )
+        # 阈值判断
+        if mid_video_predict_res > threshold:
+            # 大于阈值,出广告
+            ad_predict = 2
+        else:
+            # 否则,不出广告
+            ad_predict = 1
+        result = {
+            'mid_group': mid_group,
+            'group_share_rate': group_share_rate,
+            'video_share_rate': video_share_rate,
+            'group_out_rate': group_out_rate,
+            'video_out_rate': video_out_rate,
+            'group_rate': group_rate,
+            'video_rate': video_rate,
+            'mid_video_predict_res': mid_video_predict_res,
+            'threshold': threshold,
+            'ad_predict': ad_predict}
+    return result
+
+
 def ad_recommend_predict(app_type, mid, video_id, ab_exp_info, ab_test_code, care_model_status):
     """
     广告推荐预测
@@ -83,7 +354,7 @@ def ad_recommend_predict(app_type, mid, video_id, ab_exp_info, ab_test_code, car
     """
     try:
         now_date = datetime.datetime.today()
-        now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+        # now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
         # 获取实验参数
         abtest_id, abtest_config_tag = get_params(ab_exp_info=ab_exp_info, ab_test_code=ab_test_code)
         if abtest_id is None or abtest_config_tag is None:
@@ -92,70 +363,105 @@ def ad_recommend_predict(app_type, mid, video_id, ab_exp_info, ab_test_code, car
         if abtest_param is None:
             return None
 
-        user_data_key = abtest_param['user'].get('data')
-        user_rule_key = abtest_param['user'].get('rule')
-        video_data_key = abtest_param['video'].get('data')
-        group_class_key = abtest_param.get('group_class_key')
-        no_ad_mid_group_list = abtest_param.get('no_ad_mid_group_list', [])
-
-        # 判断mid所属分组
-        mid_group_key_name = f"{config_.KEY_NAME_PREFIX_MID_GROUP}{group_class_key}:{mid}"
-        mid_group = redis_helper.get_data_from_redis(key_name=mid_group_key_name)
-        if mid_group is None:
-            mid_group = 'mean_group'
-
-        # 判断用户是否在免广告用户组列表中
-        if mid_group in no_ad_mid_group_list:
-            # 在免广告用户组列表中,则不出广告
-            ad_predict = 1
-            result = {
-                'mid_group': mid_group,
-                'ad_predict': ad_predict
-            }
+        threshold_mix_func = abtest_param.get('threshold_mix_func', None)
+        if threshold_mix_func == 'add':
+            result = predict_mid_video_res_with_add(
+                now_date=now_date,
+                mid=mid,
+                video_id=video_id,
+                abtest_param=abtest_param,
+                abtest_id=abtest_id,
+                abtest_config_tag=abtest_config_tag,
+                ab_test_code=ab_test_code,
+                care_model_status=care_model_status
+            )
+        elif threshold_mix_func == 'multiply':
+            result = predict_mid_video_res_with_multiply(
+                now_date=now_date,
+                mid=mid,
+                video_id=video_id,
+                abtest_param=abtest_param,
+                abtest_id=abtest_id,
+                abtest_config_tag=abtest_config_tag,
+                ab_test_code=ab_test_code,
+                care_model_status=care_model_status
+            )
         else:
-            # 获取用户组分享率
-            group_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{user_data_key}:{user_rule_key}:{now_dt}"
-            if not redis_helper.key_exists(group_share_rate_key):
-                redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
-                group_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{user_data_key}:{user_rule_key}:{redis_dt}"
-            group_share_rate = redis_helper.get_score_with_value(key_name=group_share_rate_key, value=mid_group)
-            # 获取视频分享率
-            video_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{video_data_key}:{now_dt}"
-            if not redis_helper.key_exists(video_share_rate_key):
-                redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
-                video_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{video_data_key}:{redis_dt}"
-            video_share_rate = redis_helper.get_score_with_value(key_name=video_share_rate_key, value=int(video_id))
-            if video_share_rate is None:
-                video_share_rate = redis_helper.get_score_with_value(key_name=video_share_rate_key, value=-1)
-
-            # 计算 mid-video 分享率
-            if group_share_rate is None or video_share_rate is None:
-                return None
-            mid_video_share_rate = float(group_share_rate) * float(video_share_rate)
-
-            # 获取对应的阈值
-            threshold = get_threshold(
+            result = predict_mid_video_res(
+                now_date=now_date,
+                mid=mid,
+                video_id=video_id,
+                abtest_param=abtest_param,
                 abtest_id=abtest_id,
                 abtest_config_tag=abtest_config_tag,
                 ab_test_code=ab_test_code,
-                mid_group=mid_group,
-                care_model_status=care_model_status,
-                abtest_param=abtest_param
+                care_model_status=care_model_status
             )
-            # 阈值判断
-            if mid_video_share_rate > threshold:
-                # 大于阈值,出广告
-                ad_predict = 2
-            else:
-                # 否则,不出广告
-                ad_predict = 1
-            result = {
-                'mid_group': mid_group,
-                'group_share_rate': group_share_rate,
-                'video_share_rate': video_share_rate,
-                'mid_video_share_rate': mid_video_share_rate,
-                'threshold': threshold,
-                'ad_predict': ad_predict}
+
+        # user_data_key = abtest_param['user'].get('data')
+        # user_rule_key = abtest_param['user'].get('rule')
+        # video_data_key = abtest_param['video'].get('data')
+        # group_class_key = abtest_param.get('group_class_key')
+        # no_ad_mid_group_list = abtest_param.get('no_ad_mid_group_list', [])
+        #
+        # # 判断mid所属分组
+        # mid_group_key_name = f"{config_.KEY_NAME_PREFIX_MID_GROUP}{group_class_key}:{mid}"
+        # mid_group = redis_helper.get_data_from_redis(key_name=mid_group_key_name)
+        # if mid_group is None:
+        #     mid_group = 'mean_group'
+        #
+        # # 判断用户是否在免广告用户组列表中
+        # if mid_group in no_ad_mid_group_list:
+        #     # 在免广告用户组列表中,则不出广告
+        #     ad_predict = 1
+        #     result = {
+        #         'mid_group': mid_group,
+        #         'ad_predict': ad_predict
+        #     }
+        # else:
+        #     # 获取用户组分享率
+        #     group_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{user_data_key}:{user_rule_key}:{now_dt}"
+        #     if not redis_helper.key_exists(group_share_rate_key):
+        #         redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+        #         group_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{user_data_key}:{user_rule_key}:{redis_dt}"
+        #     group_share_rate = redis_helper.get_score_with_value(key_name=group_share_rate_key, value=mid_group)
+        #     # 获取视频分享率
+        #     video_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{video_data_key}:{now_dt}"
+        #     if not redis_helper.key_exists(video_share_rate_key):
+        #         redis_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
+        #         video_share_rate_key = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{video_data_key}:{redis_dt}"
+        #     video_share_rate = redis_helper.get_score_with_value(key_name=video_share_rate_key, value=int(video_id))
+        #     if video_share_rate is None:
+        #         video_share_rate = redis_helper.get_score_with_value(key_name=video_share_rate_key, value=-1)
+        #
+        #     # 计算 mid-video 分享率
+        #     if group_share_rate is None or video_share_rate is None:
+        #         return None
+        #     mid_video_share_rate = float(group_share_rate) * float(video_share_rate)
+        #
+        #     # 获取对应的阈值
+        #     threshold = get_threshold(
+        #         abtest_id=abtest_id,
+        #         abtest_config_tag=abtest_config_tag,
+        #         ab_test_code=ab_test_code,
+        #         mid_group=mid_group,
+        #         care_model_status=care_model_status,
+        #         abtest_param=abtest_param
+        #     )
+        #     # 阈值判断
+        #     if mid_video_share_rate > threshold:
+        #         # 大于阈值,出广告
+        #         ad_predict = 2
+        #     else:
+        #         # 否则,不出广告
+        #         ad_predict = 1
+        #     result = {
+        #         'mid_group': mid_group,
+        #         'group_share_rate': group_share_rate,
+        #         'video_share_rate': video_share_rate,
+        #         'mid_video_share_rate': mid_video_share_rate,
+        #         'threshold': threshold,
+        #         'ad_predict': ad_predict}
 
         return result
 

+ 190 - 0
config.py

@@ -955,6 +955,25 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group', 'return0share1mids'],
         },  # 所有广告类型数据 + 优化阈值计算方式 + [else, return0share1mids]非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '173-p': {
+            'share': {'video': {'data': 'videos0'}, 'user': {'data': 'user0', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos0out'}, 'user': {'data': 'user0out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'add',
+            'mix_param': {'share_weight': 0.3, 'out_weight': 0.7}
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案一(加权融合: k1*p(不直接跳出|出广告) + k2*p(分享|出广告))
+        '173-q': {
+            'share': {'video': {'data': 'videos0'}, 'user': {'data': 'user0', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos0out'}, 'user': {'data': 'user0out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'multiply',
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案二(乘积融合: p(不直接跳出|出广告) * p(分享|出广告))
 
         # 票圈视频+
         # '190-a': {
@@ -1011,6 +1030,25 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '190-i': {
+            'share': {'video': {'data': 'videos21'}, 'user': {'data': 'user21', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos21out'}, 'user': {'data': 'user21out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'add',
+            'mix_param': {'share_weight': 0.3, 'out_weight': 0.7}
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案一(加权融合: k1*p(不直接跳出|出广告) + k2*p(分享|出广告))
+        '190-j': {
+            'share': {'video': {'data': 'videos21'}, 'user': {'data': 'user21', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos21out'}, 'user': {'data': 'user21out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'multiply',
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案二(乘积融合: p(不直接跳出|出广告) * p(分享|出广告))
 
         # 票圈视频
         # '194-a': {
@@ -1081,6 +1119,25 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '194-k': {
+            'share': {'video': {'data': 'videos4'}, 'user': {'data': 'user4', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos4out'}, 'user': {'data': 'user4out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'add',
+            'mix_param': {'share_weight': 0.3, 'out_weight': 0.7}
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案一(加权融合: k1*p(不直接跳出|出广告) + k2*p(分享|出广告))
+        '194-l': {
+            'share': {'video': {'data': 'videos4'}, 'user': {'data': 'user4', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos4out'}, 'user': {'data': 'user4out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'multiply',
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案二(乘积融合: p(不直接跳出|出广告) * p(分享|出广告))
 
         # 内容精选
         # '195-a': {
@@ -1145,6 +1202,25 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group', 'return0share1mids'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + [else, return0share1mids]非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '195-j': {
+            'share': {'video': {'data': 'videos5'}, 'user': {'data': 'user5', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos5out'}, 'user': {'data': 'user5out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'add',
+            'mix_param': {'share_weight': 0.3, 'out_weight': 0.7}
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案一(加权融合: k1*p(不直接跳出|出广告) + k2*p(分享|出广告))
+        '195-k': {
+            'share': {'video': {'data': 'videos5'}, 'user': {'data': 'user5', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos5out'}, 'user': {'data': 'user5out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'multiply',
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案二(乘积融合: p(不直接跳出|出广告) * p(分享|出广告))
 
         # 票圈短视频
         # '196-a': {
@@ -1201,6 +1277,25 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '196-i': {
+            'share': {'video': {'data': 'videos6'}, 'user': {'data': 'user6', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos6out'}, 'user': {'data': 'user6out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'add',
+            'mix_param': {'share_weight': 0.3, 'out_weight': 0.7}
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案一(加权融合: k1*p(不直接跳出|出广告) + k2*p(分享|出广告))
+        '196-j': {
+            'share': {'video': {'data': 'videos6'}, 'user': {'data': 'user6', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos6out'}, 'user': {'data': 'user6out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'multiply',
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案二(乘积融合: p(不直接跳出|出广告) * p(分享|出广告))
 
         # 老好看视频
         # '197-a': {
@@ -1257,6 +1352,25 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '197-i': {
+            'share': {'video': {'data': 'videos18'}, 'user': {'data': 'user18', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos18out'}, 'user': {'data': 'user18out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'add',
+            'mix_param': {'share_weight': 0.3, 'out_weight': 0.7}
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案一(加权融合: k1*p(不直接跳出|出广告) + k2*p(分享|出广告))
+        '197-j': {
+            'share': {'video': {'data': 'videos18'}, 'user': {'data': 'user18', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos18out'}, 'user': {'data': 'user18out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'multiply',
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案二(乘积融合: p(不直接跳出|出广告) * p(分享|出广告))
 
         # 票圈最惊奇
         # '198-a': {
@@ -1313,6 +1427,25 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '198-i': {
+            'share': {'video': {'data': 'videos19'}, 'user': {'data': 'user19', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos19out'}, 'user': {'data': 'user19out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'add',
+            'mix_param': {'share_weight': 0.3, 'out_weight': 0.7}
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案一(加权融合: k1*p(不直接跳出|出广告) + k2*p(分享|出广告))
+        '198-j': {
+            'share': {'video': {'data': 'videos19'}, 'user': {'data': 'user19', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos19out'}, 'user': {'data': 'user19out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'multiply',
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案二(乘积融合: p(不直接跳出|出广告) * p(分享|出广告))
 
         # 票圈足迹
         # '242-a': {
@@ -1357,6 +1490,25 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '242-g': {
+            'share': {'video': {'data': 'videos22'}, 'user': {'data': 'user22', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos22out'}, 'user': {'data': 'user22out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'add',
+            'mix_param': {'share_weight': 0.3, 'out_weight': 0.7}
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案一(加权融合: k1*p(不直接跳出|出广告) + k2*p(分享|出广告))
+        '242-h': {
+            'share': {'video': {'data': 'videos22'}, 'user': {'data': 'user22', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos22out'}, 'user': {'data': 'user22out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'multiply',
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案二(乘积融合: p(不直接跳出|出广告) * p(分享|出广告))
 
         # 票圈福年
         # '243-a': {
@@ -1401,6 +1553,25 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group'],
         },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '243-g': {
+            'share': {'video': {'data': 'videos3'}, 'user': {'data': 'user3', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos3out'}, 'user': {'data': 'user3out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'add',
+            'mix_param': {'share_weight': 0.3, 'out_weight': 0.7}
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案一(加权融合: k1*p(不直接跳出|出广告) + k2*p(分享|出广告))
+        '243-h': {
+            'share': {'video': {'data': 'videos3'}, 'user': {'data': 'user3', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos3out'}, 'user': {'data': 'user3out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'multiply',
+        },  # 所有广告类型本端视频数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案二(乘积融合: p(不直接跳出|出广告) * p(分享|出广告))
 
         # 票圈|信仰之路
         '324-a': {
@@ -1427,6 +1598,25 @@ class BaseConfig(object):
             'care_model_status_param': 1,
             'care_model_ab_mid_group': ['mean_group'],
         },  # vlog端所有广告类型数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 使用以是否直接跳出为目标的数据
+        '324-d': {
+            'share': {'video': {'data': 'videos0'}, 'user': {'data': 'user0', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos0out'}, 'user': {'data': 'user0out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'add',
+            'mix_param': {'share_weight': 0.3, 'out_weight': 0.7}
+        },  # vlog端所有广告类型数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案一(加权融合: k1*p(不直接跳出|出广告) + k2*p(分享|出广告))
+        '324-e': {
+            'share': {'video': {'data': 'videos0'}, 'user': {'data': 'user0', 'rule': 'rule2'}},
+            'out': {'video': {'data': 'videos0out'}, 'user': {'data': 'user0out', 'rule': 'rule2'}},
+            'group_class_key': 'class1',
+            'no_ad_mid_group_list': NO_AD_MID_GROUP_LIST['class1'],
+            'care_model_status_param': 1,
+            'care_model_ab_mid_group': ['mean_group'],
+            'threshold_mix_func': 'multiply',
+        },  # vlog端所有广告类型数据 + 优化阈值计算方式 + else非关怀模式人群多出广告 + 分享与不直接跳出融合方案二(乘积融合: p(不直接跳出|出广告) * p(分享|出广告))
 
     }
 

+ 81 - 44
rank_service.py

@@ -8,7 +8,7 @@ log_ = Log()
 
 
 config_ = set_config()
-def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
+def get_featurs(mid, data, size, top_K, flow_pool_P, rec_recall_vid_list, env_dict=None, video_static_info=None, video_hour_static_info=None):
     feature_dict = {}
     # defult value
     apptype = 4
@@ -73,13 +73,12 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     day_share_users = []
     recommendsource_list = []
     relevant_video_list = []
-    recall_list = env_dict.get('recall_list', [])
     city_list = []
     province_list = []
-    if recall_list and len(recall_list)>0:
-        for i in range(len(recall_list)):
+    if rec_recall_vid_list and len(rec_recall_vid_list)>0:
+        for i in range(len(rec_recall_vid_list)):
             mid_list.append(mid)
-            videoid_list.append(int(recall_list[i]))
+            videoid_list.append(rec_recall_vid_list[i])
             apptype_list.append(apptype)
             pagesource_list.append(pagesource)
             versioncode_list.append(versioncode)
@@ -91,9 +90,6 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
             city_list.append(city_code)
             province_list.append(province_code)
             relevant_video_list.append(relevant_video_id)
-    video_static_info = env_dict.get('vid_day_fea_list', [])
-    video_hour_static_info = env_dict.get('vid_hour_fea_list', [])
-    #print("video_static_info:",video_static_info)
     if video_static_info and len(video_static_info)>0:
         for i in range(len(video_static_info)):
             try:
@@ -104,13 +100,13 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
                     day_share_score_list.append(float(per_video_staic[2]))
                     day_return_rate_list.append(float(per_video_staic[3]))
                     day_ctr_score_list.append(float(per_video_staic[4]))
-                    # day_view_users_list.append(int(per_video_staic[5]))
-                    # day_view_pv_list.append(int(per_video_staic[6]))
-                    # day_play_users_list.append(int(per_video_staic[7]))
-                    # day_play_pv_list.append(int(per_video_staic[8]))
-                    # day_share_users.append(int(per_video_staic[9]))
-                    # day_share_pv_list.append(int(per_video_staic[10]))
-                    # day_return_users_list.append(int(per_video_staic[11]))
+                    day_view_users_list.append(int(per_video_staic[5]))
+                    day_view_pv_list.append(int(per_video_staic[6]))
+                    day_play_users_list.append(int(per_video_staic[7]))
+                    day_play_pv_list.append(int(per_video_staic[8]))
+                    day_share_users.append(int(per_video_staic[9]))
+                    day_share_pv_list.append(int(per_video_staic[10]))
+                    day_return_users_list.append(int(per_video_staic[11]))
                 else:
                     insert_static_default_fea(day_ctr_score_list, day_play_pv_list, day_play_users_list,
                                               day_return_rate_list, day_return_users_list, day_rov_list,
@@ -122,7 +118,7 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
                                           day_share_pv_list, day_share_return_score_list, day_share_score_list,
                                           day_share_users, day_view_pv_list, day_view_users_list)
     else:
-        for i in range(len(recall_list)):
+        for i in range(len(rec_recall_vid_list)):
             insert_static_default_fea(day_ctr_score_list, day_play_pv_list, day_play_users_list,
                                       day_return_rate_list, day_return_users_list, day_rov_list,
                                       day_share_pv_list, day_share_return_score_list, day_share_score_list,
@@ -132,6 +128,13 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     hour_share_score_list =[]
     hour_return_rate_list = []
     hour_ctr_score_list = []
+    hour_play_pv_list = []
+    hour_play_users_list = []
+    hour_return_users_list = []
+    hour_share_pv_list = []
+    hour_view_pv_list = []
+    hour_view_users_list = []
+    hour_share_users = []
 
     if video_hour_static_info and len(video_hour_static_info)>0:
         for i in range(len(video_hour_static_info)):
@@ -143,20 +146,41 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
                     hour_share_score_list.append(float(per_hour_video_staic[2]))
                     hour_return_rate_list.append(float(per_hour_video_staic[3]))
                     hour_ctr_score_list.append(float(per_hour_video_staic[4]))
+                    hour_view_users_list.append(int(per_hour_video_staic[5]))
+                    hour_view_pv_list.append(int(per_hour_video_staic[6]))
+                    hour_play_users_list.append(int(per_hour_video_staic[7]))
+                    hour_play_pv_list.append(int(per_hour_video_staic[8]))
+                    hour_share_users.append(int(per_hour_video_staic[9]))
+                    hour_share_pv_list.append(int(per_hour_video_staic[10]))
+                    hour_return_users_list.append(int(per_hour_video_staic[11]))
                 else:
                     hour_rov_list.append(0.0)
                     hour_share_return_score_list.append(0.0)
                     hour_share_score_list.append(0.0)
                     hour_return_rate_list.append(0.0)
                     hour_ctr_score_list.append(0.0)
+                    hour_play_pv_list.append(0)
+                    hour_play_users_list.append(0)
+                    hour_return_users_list.append(0)
+                    hour_share_pv_list.append(0)
+                    hour_view_pv_list.append(0)
+                    hour_view_users_list.append(0)
+                    hour_share_users.append(0)
             except Exception:
                     hour_rov_list.append(0.0)
                     hour_share_return_score_list.append(0.0)
                     hour_share_score_list.append(0.0)
                     hour_return_rate_list.append(0.0)
                     hour_ctr_score_list.append(0.0)
+                    hour_play_pv_list.append(0)
+                    hour_play_users_list.append(0)
+                    hour_return_users_list.append(0)
+                    hour_share_pv_list.append(0)
+                    hour_view_pv_list.append(0)
+                    hour_view_users_list.append(0)
+                    hour_share_users.append(0)
     else:
-        for i in range(len(recall_list)):
+        for i in range(len(rec_recall_vid_list)):
             hour_rov_list.append(0.0)
             hour_share_return_score_list.append(0.0)
             hour_share_score_list.append(0.0)
@@ -178,18 +202,25 @@ def get_featurs(mid, data, size, top_K, flow_pool_P, env_dict=None):
     feature_dict["day_share_score_list"] = day_share_score_list
     feature_dict["day_return_rate_list"] = day_return_rate_list
     feature_dict["day_ctr_score_list"] = day_ctr_score_list
-    # feature_dict["day_play_pv_list"] = day_play_pv_list
-    # feature_dict["day_play_users_list"] = day_play_users_list
-    # feature_dict["day_return_users_list"] = day_return_users_list
-    # feature_dict["day_share_pv_list"] = day_share_pv_list
-    # feature_dict["day_view_pv_list"] = day_view_pv_list
-    # feature_dict["day_view_users_list"] = day_view_users_list
-    # feature_dict["day_share_users_list"] = day_share_users
+    feature_dict["day_play_pv_list"] = day_play_pv_list
+    feature_dict["day_play_users_list"] = day_play_users_list
+    feature_dict["day_return_users_list"] = day_return_users_list
+    feature_dict["day_share_pv_list"] = day_share_pv_list
+    feature_dict["day_view_pv_list"] = day_view_pv_list
+    feature_dict["day_view_users_list"] = day_view_users_list
+    feature_dict["day_share_users_list"] = day_share_users
     feature_dict["hour_rov_list"] = hour_rov_list
     feature_dict["hour_share_return_score_list"] = hour_share_return_score_list
     feature_dict["hour_share_score_list"] = hour_share_score_list
     feature_dict["hour_return_rate_list"] = hour_return_rate_list
     feature_dict["hour_ctr_score_list"] = hour_ctr_score_list
+    feature_dict["hour_play_pv_list"] = hour_play_pv_list
+    feature_dict["hour_play_users_list"] = hour_play_users_list
+    feature_dict["hour_return_users_list"] = hour_return_users_list
+    feature_dict["hour_share_pv_list"] = hour_share_pv_list
+    feature_dict["hour_view_pv_list"] = hour_view_pv_list
+    feature_dict["hour_view_users_list"] = hour_view_users_list
+    feature_dict["hour_share_users_list"] = hour_share_users
     feature_dict["city_code"] = city_list
     feature_dict["province_code"] = province_list
     feature_dict["relevant_video_id"] = relevant_video_list
@@ -204,13 +235,13 @@ def insert_static_default_fea(day_ctr_score_list, day_play_pv_list, day_play_use
     day_share_score_list.append(0.0)
     day_return_rate_list.append(0.0)
     day_ctr_score_list.append(0.0)
-    # day_view_users_list.append(0)
-    # day_view_pv_list.append(0)
-    # day_play_users_list.append(0)
-    # day_play_pv_list.append(0)
-    # day_share_users.append(0)
-    # day_share_pv_list.append(0)
-    # day_return_users_list.append(0)
+    day_view_users_list.append(0)
+    day_view_pv_list.append(0)
+    day_play_users_list.append(0)
+    day_play_pv_list.append(0)
+    day_share_users.append(0)
+    day_share_pv_list.append(0)
+    day_return_users_list.append(0)
 
 
 def get_tf_serving_sores(feature_dict):
@@ -231,18 +262,25 @@ def get_tf_serving_sores(feature_dict):
                     "day_share_score": feature_dict["day_share_score_list"],
                     "day_return_rate": feature_dict["day_return_rate_list"],
                     "day_ctr_score": feature_dict["day_ctr_score_list"],
-                    # "day_play_pv": feature_dict["day_play_pv_list"],
-                    # "day_play_users": feature_dict["day_play_users_list"],
-                    # "day_share_pv": feature_dict["day_share_pv_list"],
-                    # "day_return_users": feature_dict["day_return_users_list"],
-                    # "day_share_users": feature_dict["day_share_users_list"],
-                    # "day_view_pv": feature_dict["day_view_pv_list"],
-                    # "day_view_users":feature_dict["day_view_users_list"],
+                    "day_play_pv": feature_dict["day_play_pv_list"],
+                    "day_play_users": feature_dict["day_play_users_list"],
+                    "day_share_pv": feature_dict["day_share_pv_list"],
+                    "day_return_users": feature_dict["day_return_users_list"],
+                    "day_share_users": feature_dict["day_share_users_list"],
+                    "day_view_pv": feature_dict["day_view_pv_list"],
+                    "day_view_users":feature_dict["day_view_users_list"],
                     "hour_rov": feature_dict["hour_rov_list"],
                     "hour_share_score": feature_dict["hour_share_score_list"],
-                    #"hour_share_return_score": feature_dict["hour_share_return_score_list"],
-                    #"hour_return_rate": feature_dict["hour_return_rate_list"],
-                    #"hour_ctr_score": feature_dict["hour_ctr_score_list"],
+                    "hour_share_return_score": feature_dict["hour_share_return_score_list"],
+                    "hour_return_rate": feature_dict["hour_return_rate_list"],
+                    "hour_ctr_score": feature_dict["hour_ctr_score_list"],
+                    "hour_play_pv": feature_dict["hour_play_pv_list"],
+                    "hour_play_users": feature_dict["hour_play_users_list"],
+                    "hour_share_pv": feature_dict["hour_share_pv_list"],
+                    "hour_return_users": feature_dict["hour_return_users_list"],
+                    "hour_share_users": feature_dict["hour_share_users_list"],
+                    "hour_view_pv": feature_dict["hour_view_pv_list"],
+                    "hour_view_users": feature_dict["hour_view_users_list"],
                     "city_code": feature_dict['city_code'],
                     "province_code": feature_dict['province_code'],
                     "relevant_video_id":feature_dict['relevant_video_id']
@@ -254,10 +292,9 @@ def get_tf_serving_sores(feature_dict):
     #print(request_data)
     # 调用http接口
     result = request_post_data(config_.TF_SERVING_URL,request_data, timeout=(0.1, 1))
-
-    # print("result:", result)
+    #print("result:", result)
     if result is None:
-        print("result is None")
+        #print("result is None")
         log_.info('call tf serving error,types: {}')
         return []
     #print(result)

+ 41 - 30
recommend.py

@@ -624,29 +624,22 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
                 'flow_pool_recall': recall_result_list[2]
             }
     # 3. 特征回流
-    rec_recall_list = []
-    vidKeys = []
-    hour_vidKeys = []
-    pre_str = "v_ctr:"
-    pre_hour_str = "v_hour_ctr:"
-    rec_recall_item_list = []
-    for recall_item in data['rov_pool_recall']:
-        if len(recall_item) <= 0:
-            continue
-        vid = recall_item.get("videoId", 0)
-        rec_recall_list.append(vid)
-        vidKeys.append(pre_str + str(vid))
-        hour_vidKeys.append(pre_hour_str + str(vid))
-        rec_recall_item_list.append(recall_item)
-    redisObj = RedisHelper()
-    video_static_info = redisObj.get_batch_key(vidKeys)
-    video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
-    vid_day_fea_list = []
-    vid_hour_fea_list = []
-    if video_static_info:
-        vid_day_fea_list = video_static_info
-    if video_hour_static_info:
-        vid_hour_fea_list = video_hour_static_info
+    #
+    # for recall_item in data['rov_pool_recall']:
+    #     if len(recall_item) <= 0:
+    #         continue
+    #     vid = recall_item.get("videoId", 0)
+    #     rec_recall_list.append(vid)
+    #     rec_recall_item_list.append(recall_item)
+    # redisObj = RedisHelper()
+    # video_static_info = redisObj.get_batch_key(vidKeys)
+    # video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
+    # vid_day_fea_list = []
+    # vid_hour_fea_list = []
+    # if video_static_info:
+    #     vid_day_fea_list = video_static_info
+    # if video_hour_static_info:
+    #     vid_hour_fea_list = video_hour_static_info
     if env_dict:
         province_code = client_info.get('provinceCode', -1)
         if province_code and province_code == "":
@@ -657,21 +650,39 @@ def video_old_recommend(request_id, mid, uid, size, top_K, flow_pool_P, app_type
         env_dict['mid'] = mid
         env_dict['province_code'] = province_code
         env_dict['city_code'] = city_code
-
-        env_dict['recall_list'] = rec_recall_list
-        env_dict['vid_day_fea_list'] = vid_day_fea_list
-        env_dict['vid_hour_fea_list'] = vid_hour_fea_list
         env_json = env_dict
     #4.
-    rank_result, flow_num  = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_code=ab_code, mid=mid, exp_config=exp_config, env_dict=env_dict, rec_recall_item_list=rec_recall_item_list)
+    rank_result, flow_num  = video_new_rank2(data=data, size=size, top_K=top_K, flow_pool_P=float(flow_pool_P), ab_code=ab_code, mid=mid, exp_config=exp_config, env_dict=env_dict)
     #print(rank_result)
     if rank_result:
         result['rank_num'] = len(rank_result)
+        day_vidKeys = []
+        hour_vidKeys = []
+        rec_recall_list = []
+        pre_str = "v_ctr:"
+        pre_hour_str = "v_hour_ctr:"
+        if env_dict and len(rank_result)>0:
+            for rec_item in rank_result:
+                vid = rec_item.get("videoId", 0)
+                rec_recall_list.append(vid)
+                day_vidKeys.append(pre_str+str(vid))
+                hour_vidKeys.append(pre_hour_str+str(vid))
+            redisObj = RedisHelper()
+            video_static_info = redisObj.get_batch_key(day_vidKeys)
+            video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
+            vid_day_fea_list = []
+            vid_hour_fea_list = []
+            if video_static_info:
+                 vid_day_fea_list = video_static_info
+            if video_hour_static_info:
+                 vid_hour_fea_list = video_hour_static_info
+            env_dict['recall_list'] = rec_recall_list
+            env_dict['vid_day_fea_list'] = vid_day_fea_list
+            env_dict['vid_hour_fea_list'] = vid_hour_fea_list
+            env_json = env_dict
     result['rankResult'] = rank_result
     result['flow_num'] = flow_num
     result['rankTime'] = (time.time() - start_rank) * 1000
-
-
     return result, env_json
     # return rank_result, last_rov_recall_key
 

+ 57 - 16
video_rank.py

@@ -593,7 +593,7 @@ def video_rank_with_old_video(rank_result, old_video_recall, size, top_K, old_vi
     return new_rank_result[:size]
 
 
-def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=None, env_dict=None, rec_recall_item_list=None):
+def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=None, env_dict=None):
     """
         视频分发排序
         :param data: 各路召回的视频 type-dict {'rov_pool_recall': [], 'flow_pool_recall': []}
@@ -602,14 +602,14 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
         :param flow_pool_P: size-top_K视频为流量池视频的概率 type-float
         :return: rank_result
         """
-    if not rec_recall_item_list and not data['flow_pool_recall']:
+    if not data['rov_pool_recall'] and not data['flow_pool_recall']:
         return [], 0
 
     #全量的是vlog,票圈精选, 334,60057,
     # 60054: simrecall,
     pre_str = "k_p2:"
-    #print("pre_str:", pre_str)
-    rov_recall_rank = rec_recall_item_list
+    rov_recall_rank = data['rov_pool_recall']
+    #print(rov_recall_rank)
     #call rank service
     #flag_call_service = 0
     sort_index = 0
@@ -618,33 +618,74 @@ def video_new_rank2(data, size, top_K, flow_pool_P, ab_code, mid, exp_config=Non
     #print("sort_index:", sort_index)
     redisObj = RedisHelper()
     vidKeys = []
-    for recall_item in rec_recall_item_list:
-        vid = recall_item.get("videoId", 0)
-        vidKeys.append(pre_str + str(vid))
+    rec_recall_item_list = []
+    rec_recall_vid_list = []
+    day_vidKeys = []
+    hour_vidKeys = []
+    pre_day_str = "v_ctr:"
+    pre_hour_str = "v_hour_ctr:"
+    for recall_item in data['rov_pool_recall']:
+        try:
+            vid = int(recall_item.get("videoId", 0))
+            rec_recall_vid_list.append(vid)
+            rec_recall_item_list.append(recall_item)
+            vidKeys.append(pre_str + str(vid))
+            day_vidKeys.append(pre_day_str+str(vid))
+            hour_vidKeys.append(pre_hour_str+str(vid))
+        except:
+            continue
     video_scores = redisObj.get_batch_key(vidKeys)
-    if ab_code == 60066 or ab_code == 60069 or ab_code == 60070 or ab_code == 60071:
-        feature_dict = get_featurs(mid, data, size, top_K, flow_pool_P, env_dict)
+    #print("video_scores:", video_scores)
+    if (ab_code == 60066 or ab_code == 60069 or ab_code == 60070 or ab_code == 60071) and len(rec_recall_vid_list)>0:
+        video_static_info = redisObj.get_batch_key(day_vidKeys)
+        video_hour_static_info = redisObj.get_batch_key(hour_vidKeys)
+        #print("env_dict:", env_dict)
+        feature_dict = get_featurs(mid, data, size, top_K, flow_pool_P, rec_recall_vid_list,env_dict, video_static_info, video_hour_static_info)
         score_result = get_tf_serving_sores(feature_dict)
+
+        #print("score_result:", score_result)
         if video_scores and len(video_scores)>0  and rec_recall_item_list and score_result and len(score_result) > 0\
                 and len(score_result) == len(rec_recall_item_list) and len(video_scores)== len(score_result):
             for i in range(len(score_result)):
                 try:
                     if video_scores[i] is None and len(score_result[i])>0:
                         return_score = 0.000000001
-                        total_score = return_score * score_result[i][0]
+                        # sore_index :10 = model score
+                        if sort_index == 10:
+                            total_score = score_result[i][0]
+                        else:
+                            total_score = return_score * score_result[i][0]
                         rec_recall_item_list[i]['sort_score'] = total_score
+                        rec_recall_item_list[i]['base_rov_score'] = 0.0
+                        rec_recall_item_list[i]['share_score'] = return_score
+                        rec_recall_item_list[i]['model_score'] = score_result[i][0]
                     else:
                         video_score_str = json.loads(video_scores[i])
-                        if len(video_score_str)>= sort_index and  len(video_score_str)>0:
-                            return_score = video_score_str[sort_index]
+                        # sore_index :10 = model score
+                        return_score = 0.000000001
+                        if sort_index == 10:
+                            total_score = score_result[i][0]
                         else:
-                            return_score = 0.000000001
-                        total_score = return_score * score_result[i][0]
+                            if len(video_score_str)>= sort_index and  len(video_score_str)>0:
+                                return_score = video_score_str[sort_index]
+                            total_score = return_score * score_result[i][0]
+                            #print("total_score:", total_score, " model score :", score_result[i][0], "return_score:",
+                             #     return_score)
                         rec_recall_item_list[i]['sort_score'] = total_score
-                except Exception:
+                        rec_recall_item_list[i]['base_rov_score'] = video_score_str[0]
+                        rec_recall_item_list[i]['share_score'] = return_score
+                        rec_recall_item_list[i]['model_score'] = score_result[i][0]
+                except Exception as e:
+                    #print('exception: {}:', e)
                     return_score = 0.000000001
-                    total_score = return_score * 0.00000001
+                    if sort_index == 10:
+                        total_score = 0.00000001
+                    else:
+                        total_score = return_score * 0.00000001
                     rec_recall_item_list[i]['sort_score'] = total_score
+                    rec_recall_item_list[i]['base_rov_score'] = 0
+                    rec_recall_item_list[i]['share_score'] = return_score
+                    rec_recall_item_list[i]['model_score'] = 0.00000001
                 rec_recall_item_list[i]['flag_call_service'] = 1
             rov_recall_rank = sorted(rec_recall_item_list, key=lambda k: k.get('sort_score', 0), reverse=True)
         else:

+ 1 - 1
video_recall.py

@@ -2694,7 +2694,7 @@ class PoolRecall(object):
             return None
 
     def get_sort_ab_codel_config(self):
-        ab_key = "sort_ab_config"
+        ab_key = "sort_ab_config2"
         data = self.redis_helper.get_data_from_redis(key_name=ab_key)
         if data is not None:
             try: