import datetime import pandas as pd from odps import ODPS from utils import data_check, get_feature_data, send_msg_to_feishu from config import set_config from log import Log config_, _ = set_config() log_ = Log() def predict_user_group_share_rate(dt, app_type): """预估用户组对应的有广告时分享率""" # 获取用户组特征 project = config_.ad_model_data['users_share_rate'].get('project') table = config_.ad_model_data['users_share_rate'].get('table') features = [ 'apptype', 'group', 'sharerate_all', 'sharerate_ad' ] user_group_df = get_feature_data(project=project, table=table, features=features, dt=dt) user_group_df['apptype'] = user_group_df['apptype'].astype(int) user_group_df = user_group_df[user_group_df['apptype'] == app_type] user_group_df['sharerate_all'] = user_group_df['sharerate_all'].astype(float) user_group_df['sharerate_ad'] = user_group_df['sharerate_ad'].astype(float) # 获取有广告时所有用户组近30天的分享率 ad_all_group_share_rate = user_group_df[user_group_df['group'] == 'allmids']['sharerate_ad'].values[0] user_group_df = user_group_df[user_group_df['group'] != 'allmids'] # 计算用户组有广告时分享率 user_group_df['group_ad_share_rate'] = \ user_group_df['sharerate_ad'] * float(ad_all_group_share_rate) / user_group_df['sharerate_all'] return user_group_df def predict_video_share_rate(dt, app_type): """预估视频有广告时分享率""" # 获取视频特征 project = config_.ad_model_data['videos_share_rate'].get('project') table = config_.ad_model_data['videos_share_rate'].get('table') features = [ 'apptype', 'videoid', 'sharerate_all', 'sharerate_ad' ] video_df = get_feature_data(project=project, table=table, features=features, dt=dt) video_df['apptype'] = video_df['apptype'].astype(int) video_df = video_df[video_df['apptype'] == app_type] video_df['sharerate_all'] = video_df['sharerate_all'].astype(float) video_df['sharerate_ad'] = video_df['sharerate_ad'].astype(float) # 获取有广告时所有视频近30天的分享率 ad_all_videos_share_rate = video_df[video_df['videoid'] == 'allvideos']['sharerate_ad'].values[0] video_df = video_df[video_df['videoid'] != 'allvideos'] # 计算视频有广告时分享率 video_df['video_ad_share_rate'] = \ video_df['sharerate_ad'] * float(ad_all_videos_share_rate) / video_df['sharerate_all'] return video_df def predict_ad_group_video(): app_type = config_.APP_TYPE['VLOG'] now_date = datetime.datetime.today() dt = datetime.datetime.strftime(now_date, '%Y%m%d') user_group_df = predict_user_group_share_rate(dt=dt, app_type=app_type) video_df = predict_video_share_rate(dt=dt, app_type=app_type) print(f"user_group_df count = {len(user_group_df)}, \nvideo_df count = {len(video_df)}") predict_df = video_df for index, item in user_group_df.iterrows(): predict_df[item['group']] = predict_df['video_ad_share_rate'] * item['group_ad_share_rate'] predict_df.to_csv('./data/ad_user_video_predict.csv') return predict_df if __name__ == '__main__': predict_df = predict_ad_group_video()