瀏覽代碼

feat:添加XGB训练脚本,不上传

zhaohaipeng 10 月之前
父節點
當前提交
17ee55a1db
共有 1 個文件被更改,包括 4 次插入100 次删除
  1. 4 100
      ad/03_xgb_train.sh

+ 4 - 100
ad/03_xgb_train.sh

@@ -4,6 +4,8 @@ set -x
 export PATH=$SPARK_HOME/bin:$PATH
 export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
 export JAVA_HOME=/usr/lib/jvm/java-1.8.0
+export PREDICT_CACHE_PATH=/root/zhaohp/XGB/predict_cache/
+export SEGMENT_BASE_PATH=/dw/recommend/model/36_model_attachment/score_calibration_file
 
 sh_path=$(cd $(dirname $0); pwd)
 source ${sh_path}/00_common.sh
@@ -73,50 +75,6 @@ declare -A real_score_map
 declare -A old_score_map
 declare -A new_score_map
 
-# 校验命令的退出码
-check_run_status() {
-    local status=$1
-    local step_start_time=$2
-    local step_name=$3
-    local msg=$4
-
-    local step_end_time=$(date +%s)
-    local step_elapsed=$(($step_end_time - $step_start_time))
-
-    if [[ -n "${old_auc}" && "${old_auc}" != "0" ]]; then
-      msg+="\n\t - 老模型AUC: ${old_auc}"
-    fi
-    if [[ -n "${new_auc}" && "${new_auc}" != "0" ]]; then
-      msg+="\n\t - 新模型AUC: ${new_auc}"
-    fi
-
-
-    if [ ${status} -ne 0 ]; then
-        echo "${LOG_PREFIX} -- ${step_name}失败: 耗时 ${step_elapsed}"
-        local elapsed=$(($step_end_time - $start_time))
-        /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level error --msg "${msg}" --start "${start_time}" --elapsed "${elapsed}" --top10 "${top10_msg}"
-        exit 1
-    else
-        echo "${LOG_PREFIX} -- ${step_name}成功: 耗时 ${step_elapsed}"
-    fi
-}
-
-send_success_upload_msg(){ 
-  # 发送更新成功通知
-  local msg=" 广告模型文件更新完成"
-  msg+="\n\t - 老模型AUC: ${old_auc}"
-  msg+="\n\t - 新模型AUC: ${new_auc}"
-  msg+="\n\t - 老模型Top10差异平均值: ${old_incr_rate_avg}"
-  msg+="\n\t - 新模型Top10差异平均值: ${new_incr_rate_avg}"
-  msg+="\n\t - 模型在HDFS中的路径: ${model_save_path}"
-  msg+="\n\t - 模型上传OSS中的路径: ${MODEL_OSS_PATH}/${model_name}.tar.gz"
-
-  local step_end_time=$(date +%s)
-  local elapsed=$((${step_end_time} - ${start_time}))
-
-  /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level info --msg "${msg}" --start "${start_time}" --elapsed "${elapsed}" --top10 "${top10_msg}"
-}
-
 init() {
   
   declare -a date_keys=()
@@ -172,47 +130,6 @@ init() {
   echo "当前Python环境安装的三方包: $(python -m pip list)"
 }
 
-# 校验大数据任务是否执行完成
-check_ad_hive() {
-  local step_start_time=$(date +%s)
-  local max_hour=05
-  local max_minute=30
-  local elapsed=0
-  while true; do
-      local python_return_code=$(python ${sh_path}/ad_utils.py --excute_program check_ad_origin_hive --partition ${today_early_1} --hh 23)
-
-      elapsed=$(($(date +%s) - ${step_start_time}))
-      if [ "${python_return_code}" -eq 0 ]; then
-          break
-      fi
-      echo "Python程序返回非0值,等待五分钟后再次调用。"
-      sleep 300
-      local current_hour=$(date +%H)
-      local current_minute=$(date +%M)
-      if (( ${current_hour} > ${max_hour} || ( ${current_hour} == ${max_hour} && ${current_minute} >= ${max_minute} ) )); then
-          local msg="大数据数据生产校验失败, 分区: ${today_early_1}"
-          echo -e "${LOG_PREFIX} -- 大数据数据生产校验 -- ${msg}: 耗时 ${elapsed}"
-          /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level error --msg "${msg}" --start "${start_time}" --elapsed "${elapsed}"
-          exit 1
-      fi
-  done
-  echo "${LOG_PREFIX} -- 大数据数据生产校验 -- 大数据数据生产校验通过: 耗时 ${elapsed}"
-}
-
-origin_data() {
-  (
-    source ${sh_path}/25_xgb_make_data_origin_bucket.sh
-    make_origin_data
-  )
-}
-
-bucket_feature() {
-  (
-    source ${sh_path}/25_xgb_make_data_origin_bucket.sh
-    make_bucket_feature
-  )
-}
-
 xgb_train() {
   local step_start_time=$(date +%s)
 
@@ -349,23 +266,10 @@ model_predict() {
 # 主方法
 main() {
   init
-
-  check_ad_hive
-
-  origin_data
-
-  bucket_feature
-
-  if [ "${current_day_of_week}" -eq 1 ] || [ "${current_day_of_week}" -eq 3 ] || [ "${current_day_of_week}" -eq 5 ]; then
-    echo "当前是周一,周三或周五,开始训练并更新模型"
     
-    xgb_train
-
-    model_predict
-  else
-    echo "当前是周一,周三或周五,不更新模型"
-  fi 
+  xgb_train
 
+  model_predict
 }