123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- #!/bin/sh
- set -x
- # 广告补数据脚本,修改{today_early_1}补单天的数据
- export PATH=$SPARK_HOME/bin:$PATH
- export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
- export JAVA_HOME=/usr/lib/jvm/java-1.8.0
- sh_path=$(cd $(dirname $0); pwd)
- source ${sh_path}/00_common.sh
- source /root/anaconda3/bin/activate py37
- # 全局常量
- LOG_PREFIX=广告模型训练任务
- HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
- TRAIN_PATH=/dw/recommend/model/31_ad_sample_data_v4
- BUCKET_FEATURE_PATH=/dw/recommend/model/33_ad_train_data_v4
- TABLE=alg_recsys_ad_sample_all
- # 任务开始时间
- start_time=$(date +%s)
- # 前一天
- today_early_1="$(date -d '1 days ago' +%Y%m%d)"
- # 校验命令的退出码
- check_run_status() {
- local status=$1
- local step_start_time=$2
- local step_name=$3
- local msg=$4
- local step_end_time=$(date +%s)
- local step_elapsed=$((${step_end_time} - ${step_start_time}))
- if [ ${status} -ne 0 ]; then
- echo "${LOG_PREFIX} -- ${step_name}失败: 耗时 ${step_elapsed}"
- local elapsed=$((${step_end_time} - ${start_time}))
- /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level error --msg "${msg}" --start "${start_time}" --elapsed "${elapsed}" --top10 "${top10_msg}"
- exit 1
- else
- echo "${LOG_PREFIX} -- ${step_name}成功: 耗时 ${step_elapsed}"
- fi
- }
- # 校验大数据任务是否执行完成
- check_ad_hive() {
- local step_start_time=$(date +%s)
- local max_hour=05
- local max_minute=30
- local elapsed=0
- while true; do
- local python_return_code=$(python ${sh_path}/ad_utils.py --excute_program check_ad_origin_hive --partition ${today_early_1} --hh 23)
- elapsed=$(($(date +%s) - ${step_start_time}))
- if [ "${python_return_code}" -eq 0 ]; then
- break
- fi
- echo "Python程序返回非0值,等待五分钟后再次调用。"
- sleep 300
- local current_hour=$(date +%H)
- local current_minute=$(date +%M)
- if (( ${current_hour} > ${max_hour} || ( ${current_hour} == ${max_hour} && ${current_minute} >= ${max_minute} ) )); then
- local msg="大数据数据生产校验失败, 分区: ${today_early_1}"
- echo -e "${LOG_PREFIX} -- 大数据数据生产校验 -- ${msg}: 耗时 ${elapsed}"
- /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level error --msg "${msg}" --start "${start_time}" --elapsed "${elapsed}"
- exit 1
- fi
- done
- echo "${LOG_PREFIX} -- 大数据数据生产校验 -- 大数据数据生产校验通过: 耗时 $elapsed"
- }
- origin_data() {
- (
- source ${sh_path}/25_xgb_make_data_origin_bucket.sh
- make_origin_data
- )
- }
- bucket_feature() {
- (
- source ${sh_path}/25_xgb_make_data_origin_bucket.sh
- make_bucket_feature
- )
- }
- # 主方法
- main() {
- check_ad_hive
- origin_data
- bucket_feature
- }
- main
|