|
@@ -62,6 +62,7 @@ check_run_status() {
|
|
|
local status=$1
|
|
|
local step_start_time=$2
|
|
|
local step_name=$3
|
|
|
+ local msg=$4
|
|
|
|
|
|
local step_end_time=$(date +%s)
|
|
|
local step_elapsed=$(($step_end_time - $step_start_time))
|
|
@@ -214,7 +215,7 @@ xgb_train() {
|
|
|
eta:0.01 gamma:0.0 max_depth:5 num_round:1000 num_worker:30 repartition:20
|
|
|
|
|
|
local return_code=$?
|
|
|
- check_run_status $return_code $step_start_time "XGB模型训练任务"
|
|
|
+ check_run_status $return_code $step_start_time "XGB模型训练任务" "XGB模型训练失败"
|
|
|
}
|
|
|
|
|
|
calc_model_predict() {
|
|
@@ -244,14 +245,14 @@ calc_model_predict() {
|
|
|
done < "${predict_analyse_file_path}"
|
|
|
|
|
|
local return_code=$?
|
|
|
- check_run_status $return_code $step_start_time "计算Top10差异"
|
|
|
+ check_run_status $return_code $step_start_time "计算Top10差异" "计算Top10差异异常"
|
|
|
|
|
|
old_incr_rate_avg=$( echo "scale=6; ${old_total_diff} / ${count}" | bc -l )
|
|
|
- check_run_status $? $step_start_time "计算老模型Top10差异"
|
|
|
+ check_run_status $? $step_start_time "计算老模型Top10差异" "计算老模型Top10差异异常"
|
|
|
|
|
|
|
|
|
new_incr_rate_avg=$( echo "scale=6; ${new_total_diff} / ${count}" | bc -l )
|
|
|
- check_run_status $? $step_start_time "计算新模型Top10差异"
|
|
|
+ check_run_status $? $step_start_time "计算新模型Top10差异" "计算新模型Top10差异异常"
|
|
|
|
|
|
echo "老模型Top10差异平均值: ${old_incr_rate_avg}"
|
|
|
echo "新模型Top10差异平均值: ${new_incr_rate_avg}"
|
|
@@ -283,17 +284,17 @@ model_predict() {
|
|
|
modelPath:${online_model_path}
|
|
|
|
|
|
local return_code=$?
|
|
|
- check_run_status $return_code $step_start_time "线上模型评估${predict_date_path: -8}的数据"
|
|
|
+ check_run_status $return_code $step_start_time "线上模型评估${predict_date_path: -8}的数据" "线上模型评估${predict_date_path: -8}的数据失败"
|
|
|
|
|
|
# 结果分析
|
|
|
local python_return_code=$(python ${sh_path}/model_predict_analyse.py -p ${online_model_predict_result_path} ${new_model_predict_result_path} -f ${predict_analyse_file_path})
|
|
|
- check_run_status $python_return_code $step_start_time "线上模型评估${predict_date_path: -8}的数据"
|
|
|
+ check_run_status $python_return_code $step_start_time "分析线上模型评估${predict_date_path: -8}的数据" "分析线上模型评估${predict_date_path: -8}的数据失败"
|
|
|
|
|
|
calc_model_predict
|
|
|
|
|
|
if (( $(echo "${new_incr_rate_avg} > 0.100000" | bc -l ) ));then
|
|
|
- check_run_status 1 $step_start_time "线上模型评估${predict_date_path: -8}的数据,绝对误差大于0.1,请检查"
|
|
|
echo "线上模型评估${predict_date_path: -8}的数据,绝对误差大于0.1,请检查"
|
|
|
+ check_run_status 1 $step_start_time "${predict_date_path: -8}的数据,绝对误差大于0.1" "线上模型评估${predict_date_path: -8}的数据,绝对误差大于0.1,请检查"
|
|
|
exit 1
|
|
|
fi
|
|
|
}
|
|
@@ -307,7 +308,7 @@ model_upload_oss() {
|
|
|
${HADOOP} fs -get ${model_save_path} ${model_name}
|
|
|
if [ ! -d ${model_name} ]; then
|
|
|
echo "从HDFS下载模型失败"
|
|
|
- check_run_status 1 $step_start_time "HDFS下载模型任务"
|
|
|
+ check_run_status 1 $step_start_time "HDFS下载模型任务" "HDFS下载模型失败"
|
|
|
exit 1
|
|
|
fi
|
|
|
|
|
@@ -319,7 +320,7 @@ model_upload_oss() {
|
|
|
|
|
|
${HADOOP} fs -put ${model_name}.tar.gz ${MODEL_OSS_PATH}
|
|
|
local return_code=$?
|
|
|
- check_run_status $return_code $step_start_time "模型上传OSS任务"
|
|
|
+ check_run_status $return_code $step_start_time "模型上传OSS任务" "模型上传OSS失败"
|
|
|
|
|
|
|
|
|
echo ${model_save_path} > ${model_path_file}
|
|
@@ -329,11 +330,10 @@ model_upload_oss() {
|
|
|
)
|
|
|
|
|
|
local return_code=$?
|
|
|
- check_run_status $return_code $step_start_time "模型上传OSS任务"
|
|
|
+ check_run_status $return_code $step_start_time "模型上传OSS任务" "模型上传OSS失败"
|
|
|
|
|
|
local step_end_time=$(date +%s)
|
|
|
local elapsed=$(($step_end_time - $start_time))
|
|
|
-
|
|
|
echo -e "$LOG_PREFIX -- 模型更新完成 -- 模型更新成功: 耗时 $elapsed"
|
|
|
|
|
|
send_success_upload_msg
|