#!/bin/sh set -x # 广告补数据脚本,修改{today_early_1}补单天的数据 export PATH=$SPARK_HOME/bin:$PATH export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf export JAVA_HOME=/usr/lib/jvm/java-1.8.0 sh_path=$(cd $(dirname $0); pwd) source ${sh_path}/00_common.sh source /root/anaconda3/bin/activate py37 # 全局常量 LOG_PREFIX=广告模型训练任务 HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop TRAIN_PATH=/dw/recommend/model/31_ad_sample_data_v4 BUCKET_FEATURE_PATH=/dw/recommend/model/33_ad_train_data_v4 TABLE=alg_recsys_ad_sample_all # 任务开始时间 start_time=$(date +%s) # 前一天 today_early_1="$(date -d '1 days ago' +%Y%m%d)" # 校验命令的退出码 check_run_status() { local status=$1 local step_start_time=$2 local step_name=$3 local msg=$4 local step_end_time=$(date +%s) local step_elapsed=$((${step_end_time} - ${step_start_time})) if [ ${status} -ne 0 ]; then echo "${LOG_PREFIX} -- ${step_name}失败: 耗时 ${step_elapsed}" local elapsed=$((${step_end_time} - ${start_time})) /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level error --msg "${msg}" --start "${start_time}" --elapsed "${elapsed}" --top10 "${top10_msg}" exit 1 else echo "${LOG_PREFIX} -- ${step_name}成功: 耗时 ${step_elapsed}" fi } # 校验大数据任务是否执行完成 check_ad_hive() { local step_start_time=$(date +%s) local max_hour=05 local max_minute=30 local elapsed=0 while true; do local python_return_code=$(python ${sh_path}/ad_utils.py --excute_program check_ad_origin_hive --partition ${today_early_1} --hh 23) elapsed=$(($(date +%s) - ${step_start_time})) if [ "${python_return_code}" -eq 0 ]; then break fi echo "Python程序返回非0值,等待五分钟后再次调用。" sleep 300 local current_hour=$(date +%H) local current_minute=$(date +%M) if (( ${current_hour} > ${max_hour} || ( ${current_hour} == ${max_hour} && ${current_minute} >= ${max_minute} ) )); then local msg="大数据数据生产校验失败, 分区: ${today_early_1}" echo -e "${LOG_PREFIX} -- 大数据数据生产校验 -- ${msg}: 耗时 ${elapsed}" /root/anaconda3/bin/python ${sh_path}/ad_monitor_util.py --level error --msg "${msg}" --start "${start_time}" --elapsed "${elapsed}" exit 1 fi done echo "${LOG_PREFIX} -- 大数据数据生产校验 -- 大数据数据生产校验通过: 耗时 $elapsed" } origin_data() { ( source ${sh_path}/25_xgb_make_data_origin_bucket.sh make_origin_data ) } bucket_feature() { ( source ${sh_path}/25_xgb_make_data_origin_bucket.sh make_bucket_feature ) } # 主方法 main() { check_ad_hive origin_data bucket_feature } main