|
@@ -19,25 +19,29 @@ MODEL_PATH=/dw/recommend/model/35_ad_model_test/
|
|
|
PREDICT_RESULT_SAVE_PATH=/dw/recommend/model/34_ad_predict_data_test/
|
|
|
TABLE=alg_recsys_ad_sample_all
|
|
|
|
|
|
-MODEL_OSS_PATH=oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/zhangbo/
|
|
|
-
|
|
|
-today_early_1="$(date -d '1 days ago' +%Y%m%d)"
|
|
|
-
|
|
|
+# 特征文件名
|
|
|
feature_file=20240703_ad_feature_name.txt
|
|
|
-# 线上模型名
|
|
|
+
|
|
|
+# 模型OSS保存路径,测试时修改为其他路径,避免影响线上
|
|
|
+MODEL_OSS_PATH=oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/model/
|
|
|
+# 线上模型名,测试时修改为其他模型名,避免影响线上
|
|
|
model_name=model_xgb_351_1000_v2_test
|
|
|
+
|
|
|
+today_early_1="$(date -d '1 days ago' +%Y%m%d)"
|
|
|
# 训练用的数据路径
|
|
|
train_data_path=""
|
|
|
# 评估用的数据路径
|
|
|
predict_date_path=""
|
|
|
#评估结果保存路径
|
|
|
-predict_result_path=""
|
|
|
+new_model_predict_result_path=""
|
|
|
# 模型保存路径
|
|
|
model_save_path=""
|
|
|
# 模型本地临时保存路径
|
|
|
model_local_path=/root/zhaohp/XGB
|
|
|
# 任务开始时间
|
|
|
start_time=$(date +%s)
|
|
|
+# 线上模型在HDFS中的路径
|
|
|
+online_model_path=`cat /root/zhaohp/XGB/online_model_path.txt`
|
|
|
|
|
|
# 校验命令的退出码
|
|
|
check_run_status() {
|
|
@@ -59,12 +63,8 @@ check_run_status() {
|
|
|
}
|
|
|
|
|
|
init() {
|
|
|
-
|
|
|
-
|
|
|
- predict_date_path=${BUCKET_FEATURE_PATH}/${today_early_1}
|
|
|
- model_save_path=${MODEL_PATH}/${model_name}_$(date -d +%Y%m%d)
|
|
|
- predict_result_path=${PREDICT_RESULT_SAVE_PATH}/${today_early_1}_351_1000
|
|
|
-
|
|
|
+
|
|
|
+ declare -a date_keys=()
|
|
|
local count=1
|
|
|
local current_data="$(date -d '2 days ago' +%Y%m%d)"
|
|
|
# 循环获取前 n 天的非节日日期
|
|
@@ -72,6 +72,10 @@ init() {
|
|
|
date_key=$(date -d "$current_data" +%Y%m%d)
|
|
|
# 判断是否是节日,并拼接训练数据路径
|
|
|
if [ $(is_not_holidays $date_key) -eq 1 ]; then
|
|
|
+
|
|
|
+ # 将 date_key 放入数组
|
|
|
+ date_keys+=("$date_key")
|
|
|
+
|
|
|
if [[ -z ${train_data_path} ]]; then
|
|
|
train_data_path="${BUCKET_FEATURE_PATH}/${date_key}"
|
|
|
else
|
|
@@ -84,15 +88,25 @@ init() {
|
|
|
current_data=$(date -d "$current_data -1 day" +%Y%m%d)
|
|
|
done
|
|
|
|
|
|
+ last_index=$((${#date_keys[@]} - 1))
|
|
|
+ train_first_day=${date_keys[$last_index]}
|
|
|
+ train_last_day=${date_keys[0]}
|
|
|
|
|
|
- echo "train_data_path: ${train_data_path}"
|
|
|
- echo "predict_date_path: ${predict_date_path}"
|
|
|
- echo "predict_result_path: ${predict_result_path}"
|
|
|
- echo "model_save_path: ${model_save_path}"
|
|
|
- echo "feature_file: ${feature_file}"
|
|
|
- echo "model_name: ${model_name}"
|
|
|
- echo "model_local_path: ${model_local_path}"
|
|
|
- echo "model_oss_path: ${MODEL_OSS_PATH}"
|
|
|
+ model_save_path=${MODEL_PATH}/${model_name}_${train_first_day: -4}_${train_last_day: -4}
|
|
|
+ predict_date_path=${BUCKET_FEATURE_PATH}/${today_early_1}
|
|
|
+ new_model_predict_result_path=${PREDICT_RESULT_SAVE_PATH}/${today_early_1}_351_1000_${train_first_day: -4}_${train_last_day: -4}
|
|
|
+ online_model_predict_result_path=${PREDICT_RESULT_SAVE_PATH}/${today_early_1}_351_1000_${online_model_path: -9}
|
|
|
+
|
|
|
+ echo "init param train_data_path: ${train_data_path}"
|
|
|
+ echo "init param predict_date_path: ${predict_date_path}"
|
|
|
+ echo "init param new_model_predict_result_path: ${new_model_predict_result_path}"
|
|
|
+ echo "init param online_model_predict_result_path: ${online_model_predict_result_path}"
|
|
|
+ echo "init param model_save_path: ${model_save_path}"
|
|
|
+ echo "init param online_model_path: ${online_model_path}"
|
|
|
+ echo "init param feature_file: ${feature_file}"
|
|
|
+ echo "init param model_name: ${model_name}"
|
|
|
+ echo "init param model_local_path: ${model_local_path}"
|
|
|
+ echo "init param model_oss_path: ${MODEL_OSS_PATH}"
|
|
|
}
|
|
|
|
|
|
# 校验大数据任务是否执行完成
|
|
@@ -140,7 +154,7 @@ xgb_train() {
|
|
|
featureFile:20240703_ad_feature_name.txt \
|
|
|
trainPath:${train_data_path} \
|
|
|
testPath:${predict_date_path} \
|
|
|
- savePath:${predict_result_path} \
|
|
|
+ savePath:${new_model_predict_result_path} \
|
|
|
modelPath:${model_save_path} \
|
|
|
eta:0.01 gamma:0.0 max_depth:5 num_round:1000 num_worker:30 repartition:20
|
|
|
|
|
@@ -148,6 +162,36 @@ xgb_train() {
|
|
|
check_run_status $return_code $step_start_time "XGB模型训练任务"
|
|
|
}
|
|
|
|
|
|
+model_predict() {
|
|
|
+
|
|
|
+ # 线上模型评估最新的数据
|
|
|
+ # local step_start_time=$(date +%s)
|
|
|
+ # /opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
|
|
|
+ # --class com.tzld.piaoquan.recommend.model.pred_01_xgb_ad_hdfsfile_20240813 \
|
|
|
+ # --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 30 \
|
|
|
+ # --conf spark.yarn.executor.memoryoverhead=1024 \
|
|
|
+ # --conf spark.shuffle.service.enabled=true \
|
|
|
+ # --conf spark.shuffle.service.port=7337 \
|
|
|
+ # --conf spark.shuffle.consolidateFiles=true \
|
|
|
+ # --conf spark.shuffle.manager=sort \
|
|
|
+ # --conf spark.storage.memoryFraction=0.4 \
|
|
|
+ # --conf spark.shuffle.memoryFraction=0.5 \
|
|
|
+ # --conf spark.default.parallelism=200 \
|
|
|
+ # /root/zhangbo/recommend-model/recommend-model-produce/target/recommend-model-produce-jar-with-dependencies.jar \
|
|
|
+ # featureFile:20240703_ad_feature_name.txt \
|
|
|
+ # testPath:${predict_date_path} \
|
|
|
+ # savePath:${online_model_predict_result_path} \
|
|
|
+ # modelPath:${online_model_path}
|
|
|
+
|
|
|
+ # local return_code=$?
|
|
|
+ # check_run_status $return_code $step_start_time "线上模型评估${predict_date_path: -8}的数据"
|
|
|
+
|
|
|
+ # local mean_abs_diff=$(python ${sh_path}/model_predict_analyse.py -p ${online_model_predict_result_path} ${new_model_predict_result_path})
|
|
|
+ local p1="/dw/recommend/model/34_ad_predict_data/20241007_351_0927_1003_1000/"
|
|
|
+ local p2="/dw/recommend/model/34_ad_predict_data/20241007_351_0930_1006_1000/"
|
|
|
+ local mean_abs_diff=$(python ${sh_path}/model_predict_analyse.py -p ${p1} ${p2})
|
|
|
+}
|
|
|
+
|
|
|
model_upload_oss() {
|
|
|
cd ${model_local_path}
|
|
|
$hadoop fs -get ${model_save_path} ./${model_name}
|
|
@@ -169,21 +213,18 @@ model_upload_oss() {
|
|
|
|
|
|
}
|
|
|
|
|
|
-model_predict() {
|
|
|
- local python_return_code=$(python ${sh_path}/model_predict_analyse.py)
|
|
|
-}
|
|
|
-
|
|
|
# 主方法
|
|
|
main() {
|
|
|
- # init
|
|
|
+ init
|
|
|
|
|
|
# check_ad_hive
|
|
|
|
|
|
# xgb_train
|
|
|
|
|
|
+ model_predict
|
|
|
+
|
|
|
# model_upload_oss
|
|
|
|
|
|
- model_predict
|
|
|
}
|
|
|
|
|
|
|