Forráskód Böngészése

feat:添加获取yarn任务列表脚本

zhaohaipeng 4 hónapja
szülő
commit
c2fbafb942

+ 37 - 0
ad/20_yarn_app_list.sh

@@ -0,0 +1,37 @@
+#!/bin/sh
+set -x
+
+# 定义合法的任务状态
+VALID_STATES="ALL,NEW,NEW_SAVING,SUBMITTED,ACCEPTED,RUNNING,FINISHED,FAILED,KILLED"
+
+# 检查是否传入参数
+if [ $# -lt 1 ]; then
+    echo "Usage: $0 <appStates>"
+    echo "Example: $0 FINISHED"
+    echo "Valid states: $VALID_STATES"
+    exit 1
+fi
+
+# 获取任务状态作为参数
+APP_STATES=$1
+
+# 检查输入状态是否合法
+if ! echo "$VALID_STATES" | grep -qw "$APP_STATES"; then
+    echo "Error: Invalid appStates '$APP_STATES'."
+    echo "Valid states: $VALID_STATES"
+    exit 1
+fi
+
+# 获取指定状态的任务列表
+yarn app -list -appStates "$APP_STATES" | awk '{print $1}' | while read -r app_id; do
+    # 获取任务详情
+    details=$(yarn application -status "$app_id" | grep -E "Application-Id|Application-Name|Start-Time|Finish-Time")
+    
+    # 提取信息
+    app_id=$(echo "$details" | grep "Application-Id" | awk -F ": " '{print $2}')
+    app_name=$(echo "$details" | grep "Application-Name" | awk -F ": " '{print $2}')
+    start_time=$(echo "$details" | grep "Start-Time" | awk -F ": " '{print $2}' | xargs -I {} date -d @{} +'%Y-%m-%d %H:%M:%S')
+    finish_time=$(echo "$details" | grep "Finish-Time" | awk -F ": " '{print $2}' | xargs -I {} date -d @{} +'%Y-%m-%d %H:%M:%S')
+
+    # 显示任务信息
+    echo -e "Application-Id: $app_id\tApplication-Name: $

+ 0 - 60
ad/22_ad_model_predict_auc.sh

@@ -1,60 +0,0 @@
-#!/bin/sh
-
-# 训练新模型,并使用后面的数据计算AUC,评估模型效果
-
-set -x
-
-begin_date=$1
-end_date=$2
-model_name=$3
-predict_dim=$4
-
-PROJECT_HOME=/root/zhaohp/20240723
-HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
-HDFS_TRAIN_DATE_PATH=/dw/recommend/model/33_ad_train_data_v4_idn1
-MODEL_PATH=${PROJECT_HOME}/model
-PREDICT_PATH=${PROJECT_HOME}/predict
-
-FM_TRAIN=/root/sunmingze/alphaFM/bin/fm_train
-FM_PREDICT=/root/sunmingze/alphaFM/bin/fm_predict
-
-train_date=$begin_date
-
-# 计算模型的AUC,从训练日期的后一天到参数的end_date
-predict_auc() {
-    echo -e "\t==================== 开始预测 $train_date 模型 ===================="
-
-    predict_date=$(date -d "$train_date +1 day" +%Y%m%d)
-    predict_end_date=$(date -d "$end_date +1 day" +%Y%m%d)
-    while [ "$predict_date" != "$predict_end_date" ]; do
-
-        $HADOOP fs -text ${HDFS_TRAIN_DATE_PATH}/${predict_date}/* | ${FM_PREDICT} -m ${MODEL_PATH}/${model_name}_${train_date}.txt -dim ${predict_dim} -core 8 -out ${PREDICT_PATH}/${model_name}_${train_date}.txt
-        auc=`cat ${PREDICT_PATH}/${model_name}_${train_date}.txt | /root/sunmingze/AUC/AUC`
-
-        echo "模型训练日期: ${train_date}, 模型预测日期: ${predict_date}, AUC: ${auc}, 模型路径: ${MODEL_PATH}/${model_name}_${train_date}.txt"
-
-        predict_date=$(date -d "$predict_date +1 day" +%Y%m%d)
-
-    done
-
-    echo -e "\n\t==================== 预测 $train_date 模型结束 ===================="
-
-}
-main() {
-
-    # 增量训练模型
-    while [ "$train_date" != "$end_date" ]; do
-        echo "==================== 开始训练 $train_date 模型 ===================="
-
-        predict_auc
-
-        echo -e "==================== 训练 $train_date 模型结束 ==================== \n\n\n\n\n\n"
-        train_date=$(date -d "$train_date +1 day" +%Y%m%d)
-    done
-
-}
-
-main
-
-
-# nohup ./22_ad_model_predict_auc.sh 20240712 20240717 model_bkb8_v4_idn1 8  > logs/22_ad_model_predict_auc.log 2>&1 &

+ 0 - 29
ad/23_ad_model_batch_calc_cid_score_avg.sh

@@ -1,29 +0,0 @@
-#!/bin/sh
-
-# 计算模型对某天,某个CID的打分情况,输出平均值
-
-set -x
-
-cids=$1
-model=$2
-hdfs_path=$3
-bias=$4
-
-MODEL_PATH=/root/zhaohp/recommend-emr-dataprocess/model/ad
-PREDICT_PATH=/root/zhaohp/recommend-emr-dataprocess/predict/ad
-HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
-FM_HOME=/root/sunmingze/alphaFM
-
-# 将cids中的逗号分隔列表拆分为数组
-IFS=',' read -ra cid_array <<< "$cids"
-
-for cid in "${cid_array[@]}"; do
-    # 对每个CID执行打分计算并输出平均值
-    $HADOOP fs -text ${hdfs_path}/* | grep "cid_${cid}" | ${FM_HOME}/bin/fm_predict -m ${MODEL_PATH}/${model}.txt -dim ${bias} -core 8 -out ${PREDICT_PATH}/${model}_${cid}.txt
-
-    score_avg=`awk '{ sum += $2; count++ } END { if (count > 0) print sum / count }' ${PREDICT_PATH}/${model}_${cid}.txt`
-
-    echo -e "CID- ${cid} -平均分计算结果: ${score_avg} \n\t模型: ${MODEL_PATH}/${model} \n\tHDFS数据路径: ${hdfs_path} \n\t"
-done
-
-# nohup ./ad/23_ad_model_batch_calc_cid_score_avg.sh 3024,2966,2670,3163,3595,3594,3364,3365,3593,3363,3180,1910,2660,3478,3431,3772,3060,3178,3056,3771,3208,3041,2910,3690,1626,3318,3357,3628,3766,3770,3763,3769,3768,3541,3534,2806,3755,3760,3319,3758,3746,3759,3747,3754,3767,3745,3756,3437,3608,3527,3691,3197,3361,3362,3212,3344,3343,3346,3345,3612,3540,3526,3611,3761,3617,3762,3618,3616,3623,3765,3624,3764,3198,3542,3353,2374,3200 model_bkb8_v55_20240804 /dw/recommend/model/33_ad_train_data_v4/20240806 8 > logs/model_bkb8_v55_20240804_cid_06_12.log 2>&1 &