Explorar o código

update ad_users_data_update

liqian %!s(int64=2) %!d(string=hai) anos
pai
achega
7e4bd05560
Modificáronse 2 ficheiros con 42 adicións e 14 borrados
  1. 39 11
      ad_users_data_update.py
  2. 3 3
      config.py

+ 39 - 11
ad_users_data_update.py

@@ -16,28 +16,41 @@ features = [
 ]
 
 
-def predict_user_group_share_rate(user_group_initial_df, dt, data_key, data_param):
+def predict_user_group_share_rate(user_group_initial_df, dt, data_params, rule_params, param):
     """预估用户组对应的有广告时分享率"""
+    # 获取对应的参数
+    data_key = param.get('data')
+    data_param = data_params.get(data_key)
+    rule_key = param.get('rule')
+    rule_param = rule_params.get(rule_key)
+
     # 获取对应的用户组特征
     user_group_df = user_group_initial_df.copy()
     user_group_df['apptype'] = user_group_df['apptype'].astype(int)
     user_group_df = user_group_df[user_group_df['apptype'] == data_param]
-    if data_key == 'data1:1':
-        user_group_df = user_group_df[~user_group_df['group'].isin(config_.NO_AD_MID_GROUP_LIST)]
-
     user_group_df['sharerate_all'].fillna(0, inplace=True)
     user_group_df['sharerate_ad'].fillna(0, inplace=True)
     user_group_df['sharerate_all'] = user_group_df['sharerate_all'].astype(float)
     user_group_df['sharerate_ad'] = user_group_df['sharerate_ad'].astype(float)
+
     # 获取有广告时所有用户组近30天的分享率
     ad_all_group_share_rate = user_group_df[user_group_df['group'] == 'allmids']['sharerate_ad'].values[0]
-    user_group_df = user_group_df[user_group_df['group'] != 'allmids']
+
+    # 获取对应的用户分组数据
+    user_group_list = rule_param.get('group_list')
+    user_group_df = user_group_df[user_group_df['group'].isin(user_group_list)]
+
+    # 去除对应无广告用户组
+    if rule_param.get('remove_no_ad_group') is True:
+        user_group_df = user_group_df[~user_group_df['group'].isin(rule_param.get('no_ad_mid_group_list'))]
+
     # 计算用户组有广告时分享率
     user_group_df['group_ad_share_rate'] = \
         user_group_df['sharerate_ad'] * float(ad_all_group_share_rate) / user_group_df['sharerate_all']
     user_group_df['group_ad_share_rate'].fillna(0, inplace=True)
+
     # 结果写入redis
-    key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{data_key}:{dt}"
+    key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{data_key}:{rule_key}:{dt}"
     redis_data = {}
     for index, item in user_group_df.iterrows():
         redis_data[item['group']] = item['group_ad_share_rate']
@@ -53,15 +66,30 @@ def update_users_data(project, table, dt, update_params):
     """预估用户组有广告时分享率"""
     # 获取用户组特征
     user_group_initial_df = get_feature_data(project=project, table=table, features=features, dt=dt)
-    for data_key, data_param in update_params.items():
-        log_.info(f"data_key = {data_key} update start...")
-        predict_user_group_share_rate(user_group_initial_df=user_group_initial_df, dt=dt, data_key=data_key, data_param=data_param)
-        log_.info(f"data_key = {data_key} update end!")
+    data_params = update_params.get('data_params')
+    rule_params = update_params.get('rule_params')
+    for param in update_params.get('params_list'):
+        log_.info(f"param = {param} update start...")
+        predict_user_group_share_rate(user_group_initial_df=user_group_initial_df,
+                                      dt=dt,
+                                      data_params=data_params,
+                                      rule_params=rule_params,
+                                      param=param)
+        log_.info(f"param = {param} update end!")
+
+
+    # for data_key, data_param in update_params.items():
+    #     log_.info(f"data_key = {data_key} update start...")
+    #     predict_user_group_share_rate(user_group_initial_df=user_group_initial_df,
+    #                                   dt=dt,
+    #                                   data_key=data_key,
+    #                                   data_param=data_param)
+    #     log_.info(f"data_key = {data_key} update end!")
 
 
 def timer_check():
     try:
-        update_params = config_.AD_USER_DATA_PARAMS
+        update_params = config_.AD_USER_PARAMS
         project = config_.ad_model_data['users_share_rate'].get('project')
         table = config_.ad_model_data['users_share_rate'].get('table')
         now_date = datetime.datetime.today()

+ 3 - 3
config.py

@@ -750,7 +750,7 @@ class BaseConfig(object):
                   'threshold': {'group': 5 / 18, 'mean_group': 5 / 18}},
     }
 
-    # 用户组有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:users:group:predict:share:rate:{user_data_key}:{date}
+    # 用户组有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:users:group:predict:share:rate:{user_data_key}:{user_rule_key}:{date}
     KEY_NAME_PREFIX_AD_GROUP = 'ad:users:group:predict:share:rate:'
     # 视频有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:video:predict:share:rate:{video_data_key}:{date}
     KEY_NAME_PREFIX_AD_VIDEO = 'ad:video:predict:share:rate:'
@@ -1084,8 +1084,8 @@ class ProductionConfig(BaseConfig):
 
 def set_config():
     # 获取环境变量 ROV_OFFLINE_ENV
-    env = os.environ.get('ROV_OFFLINE_ENV')
-    # env = 'dev'
+    # env = os.environ.get('ROV_OFFLINE_ENV')
+    env = 'dev'
     if env is None:
         # log_.error('ENV ERROR: is None!')
         return