1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- import datetime
- import pandas as pd
- from odps import ODPS
- from utils import data_check, get_feature_data, send_msg_to_feishu
- from config import set_config
- from log import Log
- config_, _ = set_config()
- log_ = Log()
- def predict_user_group_share_rate(dt, app_type):
- """预估用户组对应的有广告时分享率"""
- # 获取用户组特征
- project = config_.ad_model_data['users_share_rate'].get('project')
- table = config_.ad_model_data['users_share_rate'].get('table')
- features = [
- 'apptype',
- 'group',
- 'sharerate_all',
- 'sharerate_ad'
- ]
- user_group_df = get_feature_data(project=project, table=table, features=features, dt=dt)
- user_group_df['apptype'] = user_group_df['apptype'].astype(int)
- user_group_df = user_group_df[user_group_df['apptype'] == app_type]
- user_group_df['sharerate_all'] = user_group_df['sharerate_all'].astype(float)
- user_group_df['sharerate_ad'] = user_group_df['sharerate_ad'].astype(float)
- # 获取有广告时所有用户组近30天的分享率
- ad_all_group_share_rate = user_group_df[user_group_df['group'] == 'allmids']['sharerate_ad'].values[0]
- user_group_df = user_group_df[user_group_df['group'] != 'allmids']
- # 计算用户组有广告时分享率
- user_group_df['group_ad_share_rate'] = \
- user_group_df['sharerate_ad'] * float(ad_all_group_share_rate) / user_group_df['sharerate_all']
- return user_group_df
- def predict_video_share_rate(dt, app_type):
- """预估视频有广告时分享率"""
- # 获取视频特征
- project = config_.ad_model_data['videos_share_rate'].get('project')
- table = config_.ad_model_data['videos_share_rate'].get('table')
- features = [
- 'apptype',
- 'videoid',
- 'sharerate_all',
- 'sharerate_ad'
- ]
- video_df = get_feature_data(project=project, table=table, features=features, dt=dt)
- video_df['apptype'] = video_df['apptype'].astype(int)
- video_df = video_df[video_df['apptype'] == app_type]
- video_df['sharerate_all'] = video_df['sharerate_all'].astype(float)
- video_df['sharerate_ad'] = video_df['sharerate_ad'].astype(float)
- # 获取有广告时所有视频近30天的分享率
- ad_all_videos_share_rate = video_df[video_df['videoid'] == 'allvideos']['sharerate_ad'].values[0]
- video_df = video_df[video_df['videoid'] != 'allvideos']
- # 计算视频有广告时分享率
- video_df['video_ad_share_rate'] = \
- video_df['sharerate_ad'] * float(ad_all_videos_share_rate) / video_df['sharerate_all']
- return video_df
- def predict_ad_group_video():
- app_type = config_.APP_TYPE['VLOG']
- now_date = datetime.datetime.today()
- dt = datetime.datetime.strftime(now_date, '%Y%m%d')
- user_group_df = predict_user_group_share_rate(dt=dt, app_type=app_type)
- video_df = predict_video_share_rate(dt=dt, app_type=app_type)
- print(f"user_group_df count = {len(user_group_df)}, \nvideo_df count = {len(video_df)}")
- predict_df = video_df
- for item in user_group_df:
- predict_df[item['group']] = predict_df['videoid'] * item['group_ad_share_rate']
- predict_df.to_csv('./data/ad_user_video_predict.csv')
- return predict_df
- if __name__ == '__main__':
- predict_df = predict_ad_group_video()
|