liqian 1 year ago
parent
commit
f3c8a97422
3 changed files with 28 additions and 17 deletions
  1. 7 5
      ad_user_data_with_out_update.py
  2. 18 9
      ad_video_data_update.py
  3. 3 3
      config.py

+ 7 - 5
ad_user_data_with_out_update.py

@@ -18,7 +18,7 @@ features = [
 
 
 def predict_user_group_out_rate(user_group_initial_df, dt, data_params, rule_params, param):
-    """预估用户组对应的有广告时直接跳出的概率"""
+    """预估用户组对应的有广告时直接跳出的概率"""
     # 获取对应的参数
     data_key = param.get('data')
     data_param = data_params.get(data_key)
@@ -44,18 +44,20 @@ def predict_user_group_out_rate(user_group_initial_df, dt, data_params, rule_par
     if rule_param.get('remove_no_ad_group') is True:
         user_group_df = user_group_df[~user_group_df['group'].isin(rule_param.get('no_ad_mid_group_list'))]
 
-    # 计算用户组有广告时分享
+    # 计算用户组有广告时直接跳出的概
     user_group_df = user_group_df[user_group_df['adrate'] != 0]
     user_group_df['group_ad_out_rate'] = \
         user_group_df['adrate_out'] * user_group_df['outrate'] / user_group_df['adrate']
     user_group_df['group_ad_out_rate'].fillna(0, inplace=True)
-
+    # 计算用户组有广告时不直接跳出的概率
+    user_group_df['group_ad_no_out_rate'] = 1 - user_group_df['group_ad_out_rate']
+    log_.info(f"user_group_df:\n{user_group_df}")
     # 结果写入redis
     key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{data_key}:{rule_key}:{dt}"
     redis_data = {}
     for index, item in user_group_df.iterrows():
-        redis_data[item['group']] = item['group_ad_out_rate']
-    group_ad_out_rate_mean = user_group_df['group_ad_out_rate'].mean()
+        redis_data[item['group']] = item['group_ad_no_out_rate']
+    group_ad_out_rate_mean = user_group_df['group_ad_no_out_rate'].mean()
     redis_data['mean_group'] = group_ad_out_rate_mean
     if len(redis_data) > 0:
         redis_helper = RedisHelper()

+ 18 - 9
ad_video_data_update.py

@@ -194,7 +194,7 @@ def update_videos_data_new(project, table, dt, update_params, top10_abnormal_vid
 
 
 def predict_video_out_rate(video_initial_df, dt, data_key, data_param, top10_abnormal_videos):
-    """预估视频有广告时被直接跳出的概率"""
+    """预估视频有广告时被直接跳出的概率"""
     # 获取对应的视频特征
     video_df = video_initial_df.copy()
     video_df['apptype'] = video_df['apptype'].astype(int)
@@ -216,18 +216,25 @@ def predict_video_out_rate(video_initial_df, dt, data_key, data_param, top10_abn
         # print(len(video_df))
     # 计算视频有广告时被直接跳出的概率
     video_df = video_df[video_df['adrate'] != 0]
+    video_df = video_df[video_df['adrate_out'] != 0]
     video_df['video_ad_out_rate'] = \
         video_df['adrate_out'] * video_df['outrate'] / video_df['adrate']
     video_df['video_ad_out_rate'].fillna(0, inplace=True)
+    # 计算视频有广告时不被直接跳出的概率
+    video_df['video_ad_no_out_rate'] = 1 - video_df['video_ad_out_rate']
+    # print(len(video_df))
+    # video_df = video_df[video_df['video_ad_no_out_rate'] != 0]
     # log_.info(f"video_df: {video_df}")
-    video_df = video_df[video_df['video_ad_out_rate'] != 0]
     log_.info(f"video_df filtered 0 length: {len(video_df)}")
+    # video_df = video_df[video_df['video_ad_no_out_rate'] != 1]
+    # log_.info(f"video_df: {video_df}")
+    # log_.info(f"video_df filtered 0 length: {len(video_df)}")
     # 结果写入redis
     key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{data_key}:{dt}"
     redis_data = {}
     for index, item in video_df.iterrows():
-        redis_data[int(item['videoid'])] = item['video_ad_out_rate']
-    group_ad_out_rate_mean = video_df['video_ad_out_rate'].mean()
+        redis_data[int(item['videoid'])] = item['video_ad_no_out_rate']
+    group_ad_out_rate_mean = video_df['video_ad_no_out_rate'].mean()
     redis_data[-1] = group_ad_out_rate_mean
     # 异常视频给定值:mean/3
     if top10_abnormal_video_ids is not None:
@@ -239,7 +246,7 @@ def predict_video_out_rate(video_initial_df, dt, data_key, data_param, top10_abn
     log_.info(f"redis_data count: {len(redis_data)}")
     if len(redis_data) > 0:
         redis_helper = RedisHelper()
-        redis_helper.add_data_with_zset(key_name=key_name, data=redis_data, expire_time=2 * 24 * 3600)
+        # redis_helper.add_data_with_zset(key_name=key_name, data=redis_data, expire_time=2 * 24 * 3600)
     return video_df
 
 
@@ -264,14 +271,16 @@ def timer_check(dt, video_key, video_params, top10_abnormal_videos):
         log_.info(f"ad video data count = {data_count}")
         # 数据准备好,进行更新
         if video_key == 'videos_data_alladtype':
-            update_videos_data_new(project=project, table=table, dt=dt, update_params=video_params,
-                                   top10_abnormal_videos=top10_abnormal_videos)
+            # update_videos_data_new(project=project, table=table, dt=dt, update_params=video_params,
+            #                        top10_abnormal_videos=top10_abnormal_videos)
+            pass
         elif video_key == 'videos_data_with_out_alladtype':
             update_videos_data_with_out(project=project, table=table, dt=dt, update_params=video_params,
                                         top10_abnormal_videos=top10_abnormal_videos)
         else:
-            update_videos_data(project=project, table=table, dt=dt, update_params=video_params,
-                               top10_abnormal_videos=top10_abnormal_videos)
+            # update_videos_data(project=project, table=table, dt=dt, update_params=video_params,
+            #                    top10_abnormal_videos=top10_abnormal_videos)
+            pass
         log_.info(f"video_key = {video_key} ad video data update end!")
         msg_list = [
             f"env: rov-offline {config_.ENV_TEXT}",

+ 3 - 3
config.py

@@ -984,7 +984,7 @@ class BaseConfig(object):
         ]
     }
 
-    # 新的 - 广告模型用户数据
+    # 新的 - 广告模型用户数据(直接跳出)
     AD_USER_WITH_OUT_PARAMS = {
         'data_params': {
             'user5out': APP_TYPE['LONG_VIDEO'],  # 内容精选
@@ -2285,8 +2285,8 @@ class ProductionConfig(BaseConfig):
 
 def set_config():
     # 获取环境变量 ROV_OFFLINE_ENV
-    env = os.environ.get('ROV_OFFLINE_ENV')
-    # env = 'dev'
+    # env = os.environ.get('ROV_OFFLINE_ENV')
+    env = 'dev'
     if env is None:
         # log_.error('ENV ERROR: is None!')
         return