Bladeren bron

udpate ad_user_data_update_with_new_strategy

liqian 1 jaar geleden
bovenliggende
commit
aa206c3b1f
2 gewijzigde bestanden met toevoegingen van 78 en 24 verwijderingen
  1. 73 21
      ad_user_data_update_with_new_strategy.py
  2. 5 3
      config.py

+ 73 - 21
ad_user_data_update_with_new_strategy.py

@@ -11,14 +11,17 @@ redis_helper = RedisHelper()
 features = [
     'apptype',
     'group',
-    'adrate',
-    'sharerate',
-    'adrate_share'
+    'ad_type',  # 0: all, 1: 自营,2: 微信
+    'sharerate',  # 分享的概率
+    'no_ad_rate',  # 不出广告的概率
+    'no_adrate_share',  # 分享的情况下且不出广告的概率
+    'ad_rate',  # 出广告的概率
+    'adrate_share',  # 分享的情况下且出广告的概率
 ]
 
 
-def predict_user_group_share_rate(user_group_initial_df, dt, data_params, rule_params, param):
-    """预估用户组对应的有广告时分享率"""
+def predict_user_group_share_rate_with_ad(user_group_initial_df, dt, data_params, rule_params, param):
+    """预估用户组有广告时分享率"""
     # 获取对应的参数
     data_key = param.get('data')
     data_param = data_params.get(data_key)
@@ -27,12 +30,15 @@ def predict_user_group_share_rate(user_group_initial_df, dt, data_params, rule_p
 
     # 获取对应的用户组特征
     user_group_df = user_group_initial_df.copy()
+    # 获取所有广告类型对应的数据
+    user_group_df['ad_type'] = user_group_df['ad_type'].astype(int)
+    user_group_df = user_group_df[user_group_df['ad_type'] == 0]
     user_group_df['apptype'] = user_group_df['apptype'].astype(int)
     user_group_df = user_group_df[user_group_df['apptype'] == data_param]
-    user_group_df['adrate'].fillna(0, inplace=True)
+    user_group_df['ad_rate'].fillna(0, inplace=True)
     user_group_df['sharerate'].fillna(0, inplace=True)
     user_group_df['adrate_share'].fillna(0, inplace=True)
-    user_group_df['adrate'] = user_group_df['adrate'].astype(float)
+    user_group_df['ad_rate'] = user_group_df['ad_rate'].astype(float)
     user_group_df['sharerate'] = user_group_df['sharerate'].astype(float)
     user_group_df['adrate_share'] = user_group_df['adrate_share'].astype(float)
 
@@ -40,18 +46,14 @@ def predict_user_group_share_rate(user_group_initial_df, dt, data_params, rule_p
     user_group_list = rule_param.get('group_list')
     user_group_df = user_group_df[user_group_df['group'].isin(user_group_list)]
 
-    # 去除对应无广告用户组
-    if rule_param.get('remove_no_ad_group') is True:
-        user_group_df = user_group_df[~user_group_df['group'].isin(rule_param.get('no_ad_mid_group_list'))]
-
     # 计算用户组有广告时分享率
-    user_group_df = user_group_df[user_group_df['adrate'] != 0]
+    user_group_df = user_group_df[user_group_df['ad_rate'] != 0]
     user_group_df['group_ad_share_rate'] = \
-        user_group_df['adrate_share'] * user_group_df['sharerate'] / user_group_df['adrate']
+        user_group_df['adrate_share'] * user_group_df['sharerate'] / user_group_df['ad_rate']
     user_group_df['group_ad_share_rate'].fillna(0, inplace=True)
 
     # 结果写入redis
-    key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{data_key}:{rule_key}:{dt}"
+    key_name = f"{config_.KEY_NAME_PREFIX_GROUP_WITH_AD}{data_key}:{rule_key}:{dt}"
     redis_data = {}
     for index, item in user_group_df.iterrows():
         redis_data[item['group']] = item['group_ad_share_rate']
@@ -63,6 +65,51 @@ def predict_user_group_share_rate(user_group_initial_df, dt, data_params, rule_p
     return user_group_df
 
 
+def predict_user_group_share_rate_no_ad(user_group_initial_df, dt, data_params, rule_params, param):
+    """预估用户组无广告时的分享率"""
+    # 获取对应的参数
+    data_key = param.get('data')
+    data_param = data_params.get(data_key)
+    rule_key = param.get('rule')
+    rule_param = rule_params.get(rule_key)
+
+    # 获取对应的用户组特征
+    user_group_df = user_group_initial_df.copy()
+    # 获取所有广告类型对应的数据
+    user_group_df['ad_type'] = user_group_df['ad_type'].astype(int)
+    user_group_df = user_group_df[user_group_df['ad_type'] == 0]
+    user_group_df['apptype'] = user_group_df['apptype'].astype(int)
+    user_group_df = user_group_df[user_group_df['apptype'] == data_param]
+    user_group_df['no_ad_rate'].fillna(0, inplace=True)
+    user_group_df['sharerate'].fillna(0, inplace=True)
+    user_group_df['no_adrate_share'].fillna(0, inplace=True)
+    user_group_df['no_ad_rate'] = user_group_df['no_ad_rate'].astype(float)
+    user_group_df['sharerate'] = user_group_df['sharerate'].astype(float)
+    user_group_df['no_adrate_share'] = user_group_df['no_adrate_share'].astype(float)
+
+    # 获取对应的用户分组数据
+    user_group_list = rule_param.get('group_list')
+    user_group_df = user_group_df[user_group_df['group'].isin(user_group_list)]
+
+    # 计算用户组有广告时分享率
+    user_group_df = user_group_df[user_group_df['ad_rate'] != 0]
+    user_group_df['group_no_ad_share_rate'] = \
+        user_group_df['no_adrate_share'] * user_group_df['sharerate'] / user_group_df['no_ad_rate']
+    user_group_df['group_no_ad_share_rate'].fillna(0, inplace=True)
+
+    # 结果写入redis
+    key_name = f"{config_.KEY_NAME_PREFIX_GROUP_NO_AD}{data_key}:{rule_key}:{dt}"
+    redis_data = {}
+    for index, item in user_group_df.iterrows():
+        redis_data[item['group']] = item['group_no_ad_share_rate']
+    group_ad_share_rate_mean = user_group_df['group_no_ad_share_rate'].mean()
+    redis_data['mean_group'] = group_ad_share_rate_mean
+    if len(redis_data) > 0:
+        redis_helper = RedisHelper()
+        redis_helper.add_data_with_zset(key_name=key_name, data=redis_data, expire_time=2 * 24 * 3600)
+    return user_group_df
+
+
 def update_users_data(project, table, dt, update_params):
     """预估用户组有广告时分享率"""
     # 获取用户组特征
@@ -71,11 +118,16 @@ def update_users_data(project, table, dt, update_params):
     rule_params = update_params.get('rule_params')
     for param in update_params.get('params_list'):
         log_.info(f"param = {param} update start...")
-        predict_user_group_share_rate(user_group_initial_df=user_group_initial_df,
-                                      dt=dt,
-                                      data_params=data_params,
-                                      rule_params=rule_params,
-                                      param=param)
+        predict_user_group_share_rate_with_ad(user_group_initial_df=user_group_initial_df,
+                                              dt=dt,
+                                              data_params=data_params,
+                                              rule_params=rule_params,
+                                              param=param)
+        predict_user_group_share_rate_no_ad(user_group_initial_df=user_group_initial_df,
+                                            dt=dt,
+                                            data_params=data_params,
+                                            rule_params=rule_params,
+                                            param=param)
         log_.info(f"param = {param} update end!")
 
 
@@ -99,11 +151,11 @@ def timer_check():
             Timer(60, timer_check).start()
 
     except Exception as e:
-        log_.error(f"用户组分享率预测数据更新失败, exception: {e}, traceback: {traceback.format_exc()}")
+        log_.error(f"新策略 -- 用户组分享率预测数据更新失败, exception: {e}, traceback: {traceback.format_exc()}")
         send_msg_to_feishu(
             webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
             key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
-            msg_text=f"rov-offline{config_.ENV_TEXT} - 用户组分享率预测数据更新失败\n"
+            msg_text=f"rov-offline{config_.ENV_TEXT} - 新策略 -- 用户组分享率预测数据更新失败\n"
                      f"exception: {e}\n"
                      f"traceback: {traceback.format_exc()}"
         )

+ 5 - 3
config.py

@@ -1125,9 +1125,7 @@ class BaseConfig(object):
         'rule_params': {
             'rule1': {
                 'group_list': AD_MID_GROUP['class1'],
-                'no_ad_mid_group_list': [],
-                'remove_no_ad_group': True,  # mean_group 预测&计算阈值时,去除不出广告的用户组
-            },  # 优化阈值计算方式
+            },
         },
         'params_list': [
             {'data': 'user0', 'rule': 'rule1'},  # 票圈vlog + 优化阈值计算方式
@@ -2280,6 +2278,10 @@ class BaseConfig(object):
     KEY_NAME_PREFIX_VIDEO_WITH_AD = 'video:predict:share:rate:with:ad:'
     # 视频无广告时的分享率预测结果存放 redis key 前缀,完整格式:video:predict:share:rate:no:ad:{video_data_key}:{date}
     KEY_NAME_PREFIX_VIDEO_NO_AD = 'video:predict:share:rate:no:ad:'
+    # 用户组有广告时的分享率预测结果存放 redis key 前缀,完整格式:users:group:predict:share:rate:with:ad:{user_data_key}:{user_rule_key}:{date}
+    KEY_NAME_PREFIX_GROUP_WITH_AD = 'users:group:predict:share:rate:with:ad:'
+    # 用户组无广告时的分享率预测结果存放 redis key 前缀,完整格式:users:group:predict:share:rate:no:ad:{user_data_key}:{user_rule_key}:{date}
+    KEY_NAME_PREFIX_GROUP_NO_AD = 'users:group:predict:share:rate:no:ad:'
 
 
 class DevelopmentConfig(BaseConfig):