|
@@ -111,38 +111,36 @@ init() {
|
|
|
echo "init param model_name: ${model_name}"
|
|
|
echo "init param model_local_path: ${model_local_path}"
|
|
|
echo "init param model_oss_path: ${MODEL_OSS_PATH}"
|
|
|
+
|
|
|
+ echo "当前Python环境安装的Python版本: $(python --version)"
|
|
|
+ echo "当前Python环境安装的三方包: $(python -m pip list)"
|
|
|
}
|
|
|
|
|
|
# 校验大数据任务是否执行完成
|
|
|
check_ad_hive() {
|
|
|
-
|
|
|
- python -m pip list
|
|
|
- echo $PYTHONPATH
|
|
|
-
|
|
|
- local step_start_time=$(date +%s)
|
|
|
- local max_hour=05
|
|
|
- local max_minute=30
|
|
|
- local elapsed=0
|
|
|
- while true; do
|
|
|
- local python_return_code=$(python ${sh_path}/ad_utils.py --excute_program check_ad_origin_hive --partition ${today_early_1} --hh 23)
|
|
|
-
|
|
|
- elapsed=$(($(date +%s) - $step_start_time))
|
|
|
- if [ "$python_return_code" -eq 0 ]; then
|
|
|
- break
|
|
|
- fi
|
|
|
- echo "Python程序返回非0值,等待五分钟后再次调用。"
|
|
|
- sleep 300
|
|
|
- local current_hour=$(date +%H)
|
|
|
- local current_minute=$(date +%M)
|
|
|
- if (( current_hour > max_hour || (current_hour == max_hour && current_minute >= max_minute) )); then
|
|
|
- local msg="大数据数据生产校验失败, 分区: ${today_early_1}"
|
|
|
- echo -e "$LOG_PREFIX -- 大数据数据生产校验 -- ${msg}: 耗时 $elapsed"
|
|
|
- # /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
|
|
|
- exit 1
|
|
|
- fi
|
|
|
- done
|
|
|
- echo "$LOG_PREFIX -- 大数据数据生产校验 -- 大数据数据生产校验通过: 耗时 $elapsed"
|
|
|
-
|
|
|
+ local step_start_time=$(date +%s)
|
|
|
+ local max_hour=05
|
|
|
+ local max_minute=30
|
|
|
+ local elapsed=0
|
|
|
+ while true; do
|
|
|
+ local python_return_code=$(python ${sh_path}/ad_utils.py --excute_program check_ad_origin_hive --partition ${today_early_1} --hh 23)
|
|
|
+
|
|
|
+ elapsed=$(($(date +%s) - $step_start_time))
|
|
|
+ if [ "$python_return_code" -eq 0 ]; then
|
|
|
+ break
|
|
|
+ fi
|
|
|
+ echo "Python程序返回非0值,等待五分钟后再次调用。"
|
|
|
+ sleep 300
|
|
|
+ local current_hour=$(date +%H)
|
|
|
+ local current_minute=$(date +%M)
|
|
|
+ if (( current_hour > max_hour || (current_hour == max_hour && current_minute >= max_minute) )); then
|
|
|
+ local msg="大数据数据生产校验失败, 分区: ${today_early_1}"
|
|
|
+ echo -e "$LOG_PREFIX -- 大数据数据生产校验 -- ${msg}: 耗时 $elapsed"
|
|
|
+ /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level error --msg "$msg" --start "$start_time" --elapsed "$elapsed"
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+ done
|
|
|
+ echo "$LOG_PREFIX -- 大数据数据生产校验 -- 大数据数据生产校验通过: 耗时 $elapsed"
|
|
|
}
|
|
|
|
|
|
make_origin_data() {
|
|
@@ -255,7 +253,7 @@ model_predict() {
|
|
|
local return_code=$?
|
|
|
check_run_status $return_code $step_start_time "线上模型评估${predict_date_path: -8}的数据"
|
|
|
|
|
|
- local mean_abs_diff=$(/root/anaconda3/bin/python ${sh_path}/model_predict_analyse.py -p ${online_model_predict_result_path} ${new_model_predict_result_path})
|
|
|
+ local mean_abs_diff=$(python ${sh_path}/model_predict_analyse.py -p ${online_model_predict_result_path} ${new_model_predict_result_path})
|
|
|
if (( $(echo "${mean_abs_diff} > 0.000400" | bc -l ) ));then
|
|
|
check_run_status 1 $step_start_time "线上模型评估${predict_date_path: -8}的数据,绝对误差大于0.000400,请检查"
|
|
|
echo "线上模型评估${predict_date_path: -8}的数据,绝对误差大于0.000400,请检查"
|
|
@@ -297,15 +295,15 @@ main() {
|
|
|
|
|
|
check_ad_hive
|
|
|
|
|
|
- # make_origin_data
|
|
|
+ make_origin_data
|
|
|
|
|
|
- # make_bucket_feature
|
|
|
+ make_bucket_feature
|
|
|
|
|
|
- # xgb_train
|
|
|
+ xgb_train
|
|
|
|
|
|
- # model_predict
|
|
|
+ model_predict
|
|
|
|
|
|
- # model_upload_oss
|
|
|
+ model_upload_oss
|
|
|
|
|
|
}
|
|
|
|