|
@@ -1,11 +1,14 @@
|
|
import datetime
|
|
import datetime
|
|
|
|
+
|
|
|
|
+import numpy as np
|
|
import pandas as pd
|
|
import pandas as pd
|
|
from odps import ODPS
|
|
from odps import ODPS
|
|
-from utils import data_check, get_feature_data, send_msg_to_feishu
|
|
|
|
|
|
+from utils import data_check, get_feature_data, send_msg_to_feishu, RedisHelper
|
|
from config import set_config
|
|
from config import set_config
|
|
from log import Log
|
|
from log import Log
|
|
config_, _ = set_config()
|
|
config_, _ = set_config()
|
|
log_ = Log()
|
|
log_ = Log()
|
|
|
|
+redis_helper = RedisHelper()
|
|
|
|
|
|
|
|
|
|
def predict_user_group_share_rate(dt, app_type):
|
|
def predict_user_group_share_rate(dt, app_type):
|
|
@@ -59,15 +62,40 @@ def predict_video_share_rate(dt, app_type):
|
|
|
|
|
|
|
|
|
|
def predict_ad_group_video():
|
|
def predict_ad_group_video():
|
|
- app_type = config_.APP_TYPE['VLOG']
|
|
|
|
- now_date = datetime.datetime.today()
|
|
|
|
|
|
+ now_date = datetime.datetime.today() - datetime.timedelta(days=1)
|
|
dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
- user_group_df = predict_user_group_share_rate(dt=dt, app_type=app_type)
|
|
|
|
- video_df = predict_video_share_rate(dt=dt, app_type=app_type)
|
|
|
|
- print(f"user_group_df count = {len(user_group_df)}, \nvideo_df count = {len(video_df)}")
|
|
|
|
|
|
+ log_.info(f"dt = {dt}")
|
|
|
|
+ # 获取用户组预测值
|
|
|
|
+ group_key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{dt}"
|
|
|
|
+ group_data = redis_helper.get_all_data_from_zset(key_name=group_key_name, with_scores=True)
|
|
|
|
+ if group_data is None:
|
|
|
|
+ log_.info(f"group data is None!")
|
|
|
|
+ group_df = pd.DataFrame(data=group_data, columns=['group', 'group_ad_share_rate'])
|
|
|
|
+ group_df = group_df[group_df['group'] != 'mean_group']
|
|
|
|
+ log_.info(f"group_df count = {len(group_df)}")
|
|
|
|
+ # 获取视频预测值
|
|
|
|
+ video_key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{dt}"
|
|
|
|
+ video_data = redis_helper.get_all_data_from_zset(key_name=video_key_name, with_scores=True)
|
|
|
|
+ if video_data is None:
|
|
|
|
+ log_.info(f"video data is None!")
|
|
|
|
+ video_df = pd.DataFrame(data=video_data, columns=['videoid', 'video_ad_share_rate'])
|
|
|
|
+ video_df = video_df[video_df['videoid'] != -1]
|
|
|
|
+ log_.info(f"video_df count = {len(video_df)}")
|
|
predict_df = video_df
|
|
predict_df = video_df
|
|
- for index, item in user_group_df.iterrows():
|
|
|
|
|
|
+ threshold_data = {}
|
|
|
|
+ all_group_data = []
|
|
|
|
+ for index, item in group_df.iterrows():
|
|
predict_df[item['group']] = predict_df['video_ad_share_rate'] * item['group_ad_share_rate']
|
|
predict_df[item['group']] = predict_df['video_ad_share_rate'] * item['group_ad_share_rate']
|
|
|
|
+ # 获取分组对应的均值作为阈值
|
|
|
|
+ threshold_data[item['group']] = predict_df[item['group']].mean()
|
|
|
|
+ all_group_data.extend(predict_df[item['group']].tolist())
|
|
|
|
+ threshold_data['mean_group'] = np.mean(all_group_data)
|
|
|
|
+ log_.info(f"threshold_data = {threshold_data}")
|
|
|
|
+ # 将阈值写入redis
|
|
|
|
+ for key, val in threshold_data.items():
|
|
|
|
+ key_name = f"{config_.KEY_NAME_PREFIX_AD_THRESHOLD}{key}"
|
|
|
|
+ redis_helper.set_data_to_redis(key_name=key_name, value=val, expire_time=2 * 24 * 3600)
|
|
|
|
+
|
|
predict_df.to_csv('./data/ad_user_video_predict.csv')
|
|
predict_df.to_csv('./data/ad_user_video_predict.csv')
|
|
return predict_df
|
|
return predict_df
|
|
|
|
|