|
@@ -0,0 +1,105 @@
|
|
|
+import datetime
|
|
|
+
|
|
|
+import numpy as np
|
|
|
+import pandas as pd
|
|
|
+from odps import ODPS
|
|
|
+from utils import data_check, get_feature_data, send_msg_to_feishu, RedisHelper
|
|
|
+from config import set_config
|
|
|
+from log import Log
|
|
|
+config_, _ = set_config()
|
|
|
+log_ = Log()
|
|
|
+redis_helper = RedisHelper()
|
|
|
+
|
|
|
+
|
|
|
+def predict_user_group_share_rate(dt, app_type):
|
|
|
+ """预估用户组对应的有广告时分享率"""
|
|
|
+ # 获取用户组特征
|
|
|
+ project = config_.ad_model_data['users_share_rate'].get('project')
|
|
|
+ table = config_.ad_model_data['users_share_rate'].get('table')
|
|
|
+ features = [
|
|
|
+ 'apptype',
|
|
|
+ 'group',
|
|
|
+ 'sharerate_all',
|
|
|
+ 'sharerate_ad'
|
|
|
+ ]
|
|
|
+ user_group_df = get_feature_data(project=project, table=table, features=features, dt=dt)
|
|
|
+ user_group_df['apptype'] = user_group_df['apptype'].astype(int)
|
|
|
+ user_group_df = user_group_df[user_group_df['apptype'] == app_type]
|
|
|
+ user_group_df['sharerate_all'] = user_group_df['sharerate_all'].astype(float)
|
|
|
+ user_group_df['sharerate_ad'] = user_group_df['sharerate_ad'].astype(float)
|
|
|
+ # 获取有广告时所有用户组近30天的分享率
|
|
|
+ ad_all_group_share_rate = user_group_df[user_group_df['group'] == 'allmids']['sharerate_ad'].values[0]
|
|
|
+ user_group_df = user_group_df[user_group_df['group'] != 'allmids']
|
|
|
+ # 计算用户组有广告时分享率
|
|
|
+ user_group_df['group_ad_share_rate'] = \
|
|
|
+ user_group_df['sharerate_ad'] * float(ad_all_group_share_rate) / user_group_df['sharerate_all']
|
|
|
+ return user_group_df
|
|
|
+
|
|
|
+
|
|
|
+def predict_video_share_rate(dt, app_type):
|
|
|
+ """预估视频有广告时分享率"""
|
|
|
+ # 获取视频特征
|
|
|
+ project = config_.ad_model_data['videos_share_rate'].get('project')
|
|
|
+ table = config_.ad_model_data['videos_share_rate'].get('table')
|
|
|
+ features = [
|
|
|
+ 'apptype',
|
|
|
+ 'videoid',
|
|
|
+ 'sharerate_all',
|
|
|
+ 'sharerate_ad'
|
|
|
+ ]
|
|
|
+ video_df = get_feature_data(project=project, table=table, features=features, dt=dt)
|
|
|
+ video_df['apptype'] = video_df['apptype'].astype(int)
|
|
|
+ video_df = video_df[video_df['apptype'] == app_type]
|
|
|
+ video_df['sharerate_all'] = video_df['sharerate_all'].astype(float)
|
|
|
+ video_df['sharerate_ad'] = video_df['sharerate_ad'].astype(float)
|
|
|
+ # 获取有广告时所有视频近30天的分享率
|
|
|
+ ad_all_videos_share_rate = video_df[video_df['videoid'] == 'allvideos']['sharerate_ad'].values[0]
|
|
|
+ video_df = video_df[video_df['videoid'] != 'allvideos']
|
|
|
+ # 计算视频有广告时分享率
|
|
|
+ video_df['video_ad_share_rate'] = \
|
|
|
+ video_df['sharerate_ad'] * float(ad_all_videos_share_rate) / video_df['sharerate_all']
|
|
|
+ return video_df
|
|
|
+
|
|
|
+
|
|
|
+def predict_ad_group_video():
|
|
|
+ now_date = datetime.datetime.today()
|
|
|
+ dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
+ log_.info(f"dt = {dt}")
|
|
|
+ # 获取用户组预测值
|
|
|
+ group_key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{dt}"
|
|
|
+ group_data = redis_helper.get_all_data_from_zset(key_name=group_key_name, with_scores=True)
|
|
|
+ if group_data is None:
|
|
|
+ log_.info(f"group data is None!")
|
|
|
+ group_df = pd.DataFrame(data=group_data, columns=['group', 'group_ad_share_rate'])
|
|
|
+ group_df = group_df[group_df['group'] != 'mean_group']
|
|
|
+ log_.info(f"group_df count = {len(group_df)}")
|
|
|
+ # 获取视频预测值
|
|
|
+ video_key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{dt}"
|
|
|
+ video_data = redis_helper.get_all_data_from_zset(key_name=video_key_name, with_scores=True)
|
|
|
+ if video_data is None:
|
|
|
+ log_.info(f"video data is None!")
|
|
|
+ video_df = pd.DataFrame(data=video_data, columns=['videoid', 'video_ad_share_rate'])
|
|
|
+ video_df = video_df[video_df['videoid'] != -1]
|
|
|
+ log_.info(f"video_df count = {len(video_df)}")
|
|
|
+ predict_df = video_df
|
|
|
+ threshold_data = {}
|
|
|
+ all_group_data = []
|
|
|
+ for index, item in group_df.iterrows():
|
|
|
+ predict_df[item['group']] = predict_df['video_ad_share_rate'] * item['group_ad_share_rate']
|
|
|
+ # 获取分组对应的均值作为阈值
|
|
|
+ threshold_data[item['group']] = predict_df[item['group']].mean()
|
|
|
+ all_group_data.extend(predict_df[item['group']].tolist())
|
|
|
+ threshold_data['mean_group'] = np.mean(all_group_data)
|
|
|
+ log_.info(f"threshold_data = {threshold_data}")
|
|
|
+ # 将阈值写入redis
|
|
|
+ for key, val in threshold_data.items():
|
|
|
+ key_name = f"{config_.KEY_NAME_PREFIX_AD_THRESHOLD}{key}"
|
|
|
+ redis_helper.set_data_to_redis(key_name=key_name, value=val, expire_time=2 * 24 * 3600)
|
|
|
+
|
|
|
+ predict_df.to_csv('./data/ad_user_video_predict.csv')
|
|
|
+ return predict_df
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ predict_df = predict_ad_group_video()
|
|
|
+
|