|
@@ -7,8 +7,12 @@ import pandas as pd
|
|
|
import xgboost as xgb
|
|
|
|
|
|
from client import ODPSClient
|
|
|
+from config import ConfigManager
|
|
|
+from helper import RedisHelper
|
|
|
+from util import feishu_inform_util
|
|
|
|
|
|
odps_client = ODPSClient.ODPSClient()
|
|
|
+config_manager = ConfigManager.ConfigManager()
|
|
|
|
|
|
features_name = ['1_vov0', '2_vov0', '3_vov0', '4_vov0', '5_vov0', '2_vov01', '3_vov01', '4_vov01', '5_vov01',
|
|
|
'3_vov012', '4_vov012', '5_vov012', "12_change", "23_change", "34_change", '2_vov01', '3_vov01',
|
|
@@ -22,7 +26,7 @@ column_names = ['曝光占比', 'vov0', '分子', '分母', '1_vov0', '2_vov0',
|
|
|
'4_vov012_分子', '4_vov012_分母', '5_vov012_分子', '5_vov012_分母']
|
|
|
|
|
|
# 创建一个logger
|
|
|
-logger = logging.getLogger("xgboost_train.py")
|
|
|
+logger = logging.getLogger("vov_xgboost_train.py")
|
|
|
logger.setLevel(logging.INFO) # 设置日志级别
|
|
|
|
|
|
# 创建Handler用于输出到文件
|
|
@@ -251,6 +255,7 @@ def xgb_multi_dt_data(t_1_label_dt: datetime):
|
|
|
|
|
|
t_3_label_dt = t_1_label_dt - timedelta(2)
|
|
|
logger.info(f"VOV模型特征数据处理:t_3_label_future.label_datetime: {t_3_label_dt.strftime('%Y%m%d')}")
|
|
|
+
|
|
|
t_3_label_future = executor.submit(fetch_data, t_3_label_dt, t_3_label_dt - timedelta(1), t_3_label_dt)
|
|
|
t_1_label_df = t_1_label_future.result()
|
|
|
t_2_label_df = t_2_label_future.result()
|
|
@@ -261,7 +266,7 @@ def xgb_multi_dt_data(t_1_label_dt: datetime):
|
|
|
|
|
|
def _main():
|
|
|
logger.info(f"XGB模型训练")
|
|
|
- train_df = xgb_multi_dt_data((datetime.now() - timedelta(days=2)))
|
|
|
+ train_df = xgb_multi_dt_data((datetime.now() - timedelta(days=3)))
|
|
|
trains_array = train_df[features_name].values
|
|
|
trains_label_array = train_df['label'].values
|
|
|
|
|
@@ -283,8 +288,8 @@ def _main():
|
|
|
model.fit(trains_array, trains_label_array)
|
|
|
|
|
|
logger.info("获取评测数据")
|
|
|
- start_label_datetime = datetime.now() - timedelta(days=1)
|
|
|
- feature_start_datetime = start_label_datetime - timedelta(1)
|
|
|
+ start_label_datetime = datetime.now() - timedelta(days=2)
|
|
|
+ feature_start_datetime = start_label_datetime
|
|
|
|
|
|
predict_df = fetch_data(start_label_datetime, feature_start_datetime, start_label_datetime)
|
|
|
tests_array = predict_df[features_name].values
|
|
@@ -293,19 +298,19 @@ def _main():
|
|
|
condition_choose = (
|
|
|
(predict_df['y_pred'] <= 0.1) &
|
|
|
(
|
|
|
- (predict_df['4_vov0_分母'] > 50) |
|
|
|
(predict_df['2_vov0_分母'] > 50) |
|
|
|
- (predict_df['3_vov0_分母'] > 50)
|
|
|
+ (predict_df['3_vov0_分母'] > 50) |
|
|
|
+ (predict_df['4_vov0_分母'] > 50)
|
|
|
) &
|
|
|
(
|
|
|
- (predict_df['1_vov0'] - predict_df['2_vov0'] <= 0.1)
|
|
|
+ (predict_df['1_vov0'] - predict_df['2_vov0'] < 0.1)
|
|
|
)
|
|
|
)
|
|
|
profit_threshold = 0.3
|
|
|
condition_choose_real = condition_choose & (predict_df['vov0'] <= profit_threshold)
|
|
|
predict_df["condition_choose"] = condition_choose
|
|
|
predict_df[["vid", "曝光占比", "vov0", "condition_choose"]].to_csv(
|
|
|
- "new_" + (datetime.now() - timedelta(days=1)).strftime("%Y%m%d"),
|
|
|
+ "./file/new_" + (datetime.now() - timedelta(days=1)).strftime("%Y%m%d"),
|
|
|
sep="\t",
|
|
|
index=False
|
|
|
)
|
|
@@ -339,12 +344,63 @@ def _main():
|
|
|
f"盈利效率:{round(profit_loss_value / profit_value, 6)}"
|
|
|
)
|
|
|
|
|
|
- filtered_vid = predict_df.loc[condition_choose_real, 'vid'].unique()
|
|
|
- print(f"要过滤掉的视频ID为: {filtered_vid}")
|
|
|
+ filtered_vid = predict_df.loc[condition_choose, 'vid'].unique()
|
|
|
+
|
|
|
+ # 写入Redis
|
|
|
+ redis_key = f"redis:lower_vov_vid:{datetime.now().strftime('%Y%m%d')}"
|
|
|
+
|
|
|
+ logger.info(f"当前环境为: {config_manager.get_env()}, 要写入的Redis Key为: {redis_key}")
|
|
|
+ host, port, password = config_manager.get_algorithm_redis_info()
|
|
|
+ alg_redis = RedisHelper.RedisHelper(host=host, port=port, password=password)
|
|
|
+ for vid in filtered_vid.tolist():
|
|
|
+ alg_redis.add_number_to_set(redis_key, vid)
|
|
|
+
|
|
|
+ alg_redis.set_expire(redis_key, 86400)
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
+ card_json = {
|
|
|
+ "config": {},
|
|
|
+ "i18n_elements": {
|
|
|
+ "zh_cn": [
|
|
|
+ {
|
|
|
+ "tag": "markdown",
|
|
|
+ "content": "",
|
|
|
+ "text_align": "left",
|
|
|
+ "text_size": "normal"
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "i18n_header": {
|
|
|
+ "zh_cn": {
|
|
|
+ "title": {
|
|
|
+ "tag": "plain_text",
|
|
|
+ "content": "XGB模型训练预测完成"
|
|
|
+ },
|
|
|
+ "template": "info"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
try:
|
|
|
- _main()
|
|
|
+ # _main()
|
|
|
+
|
|
|
+ msg_text = f"\n- 所属项目: model_monitor" \
|
|
|
+ f"\n- 所属环境: {config_manager.get_env()}" \
|
|
|
+ f"\n- 告警描述: VOV预测模型训练和预测完成, 用于低VOV视频过滤"
|
|
|
+ card_json['i18n_elements']['zh_cn'][0]['content'] = msg_text
|
|
|
+
|
|
|
except Exception as e:
|
|
|
logger.error("VOV过滤XGB模型训练异常: ", e)
|
|
|
+ msg_text = f"\n- 所属项目: rov-offline" \
|
|
|
+ f"\n- 告警名称: XGB模型训练失败" \
|
|
|
+ f"\n- 所属环境: {config_manager.get_env()}" \
|
|
|
+ f"\n- 告警描述: VOV预测模型训练和预测失败, 用于低VOV视频过滤"
|
|
|
+ card_json['i18n_header']['zh_cn']['template'] = "error"
|
|
|
+ card_json['i18n_header']['zh_cn']["title"]['content'] = "XGB模型训练预测失败"
|
|
|
+ card_json['i18n_elements']['zh_cn'][0]['content'] = msg_text
|
|
|
+ # 发送通知
|
|
|
+ feishu_inform_util.send_card_msg_to_feishu(
|
|
|
+ webhook=config_manager.get_vov_model_inform_feishu_webhook(),
|
|
|
+ card_json=card_json
|
|
|
+ )
|