瀏覽代碼

ros-模型更新

jch 1 天之前
父節點
當前提交
dac2124cf5

+ 46 - 0
rec_scripts/nor/20260123/nor_pipline.sh

@@ -0,0 +1,46 @@
+#!/bin/bash
+
+run_mode=""
+data_date=""
+if(($#==1))
+then
+  run_mode=$1
+  data_date=$(date +%Y%m%d -d "-2 $days day")
+elif(($#==2))
+then
+  run_mode=$1
+  data_date=$2
+else
+  exit -1
+fi
+
+if [[ "$run_mode" != "run" ]]
+then
+  exit -1
+fi
+
+# 0. env
+abs_path=$(cd `dirname $0`; pwd)
+feature_file="${abs_path}/../feature/20250627_recsys_nor_name.txt"
+
+# 1. nor sample
+sample_sh="${abs_path}/nor_sample.sh"
+echo `date` "sh +x $sample_sh $data_date $data_date $feature_file"
+sh +x $sample_sh $data_date $data_date $feature_file &
+wait
+sleep 30s
+
+# 2. nor train
+train_sh="${abs_path}/nor_train.sh"
+start_date=$(date -d "$data_date -6 day" +"%Y%m%d")
+end_date=$data_date
+echo `date` "sh +x $train_sh $start_date $end_date"
+sh +x $train_sh $start_date $end_date &
+wait
+sleep 30s
+
+# 3. nor predict
+predict_sh="${abs_path}/nor_predict.sh"
+echo `date` "sh +x $predict_sh $data_date $data_date"
+sh +x $predict_sh $data_date $data_date &
+wait

+ 60 - 0
rec_scripts/nor/20260123/nor_predict.sh

@@ -0,0 +1,60 @@
+#!/bin/sh
+set -x
+
+start_date=""
+end_date=""
+if(($#==2))
+then
+    start_date=$1
+    end_date=$2
+else
+    start_date=$(date +%Y%m%d -d "-2 $days day")
+    end_date=$start_date
+fi
+
+# env
+export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
+export JAVA_HOME=/usr/lib/jvm/java-1.8.0
+
+# params
+FEATURE_FILE=20250627_recsys_nor_name.txt
+BASE_TRAIN_DATA_PATH=/dw/recommend/model/84_recsys_nor_train_data
+PREDICT_RESULT_PATH=/dw/recommend/model/84_recsys_nor_predict_data
+MODEL_SAVE_PATH=/dw/recommend/model/84_recsys_nor_model/model_xgb
+
+test_data_path=""
+for((i=0; i<=21; i++))
+do
+  data_date=$(date -d "$start_date $i day" +"%Y%m%d")
+  if [ "$data_date" -le "$end_date" ]
+  then
+    one_day_data_path="${BASE_TRAIN_DATA_PATH}/${data_date}"
+    if [[ -z $test_data_path ]]
+    then
+      test_data_path=$one_day_data_path
+    else
+      test_data_path="$test_data_path,$one_day_data_path"
+    fi
+  fi
+done
+
+
+/opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
+--class com.tzld.piaoquan.recommend.model.pred_recsys_61_xgb_nor_hdfsfile_20241209 \
+--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 4 \
+--conf spark.yarn.executor.memoryoverhead=1024 \
+--conf spark.shuffle.service.enabled=true \
+--conf spark.shuffle.service.port=7337 \
+--conf spark.shuffle.consolidateFiles=true \
+--conf spark.shuffle.manager=sort \
+--conf spark.storage.memoryFraction=0.4 \
+--conf spark.shuffle.memoryFraction=0.5 \
+--conf spark.default.parallelism=200 \
+--conf spark.debug.maxToStringFields=100 \
+/mnt/disk1/jch/recommend-model/recommend-model-produce/target/recommend-model-produce-jar-with-dependencies.jar \
+labelLogType:0 \
+labelLogBase:1.5 \
+featureFile:${FEATURE_FILE} \
+testPath:${test_data_path} \
+savePath:${PREDICT_RESULT_PATH} \
+modelPath:${MODEL_SAVE_PATH}

+ 43 - 0
rec_scripts/nor/20260123/nor_sample.sh

@@ -0,0 +1,43 @@
+#!/bin/sh
+set -x
+
+start_date=""
+end_date=""
+feature_file=""
+if(($#==3))
+then
+    start_date=$1
+    end_date=$2
+    feature_file=$3
+else
+    exit -1
+fi
+
+# env
+export SPARK_HOME=/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8
+export PATH=$SPARK_HOME/bin:$PATH
+export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
+export JAVA_HOME=/usr/lib/jvm/java-1.8.0
+
+# params
+sampleRate=-1
+readPath=/dw/recommend/model/84_origin_data/
+savePath=/dw/recommend/model/84_recsys_nor_train_data/
+
+echo `date` "nor sample"
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_recsys_r_rate.makedata_recsys_86_nor_sample_20250627 \
+--master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
+--files ${feature_file} \
+/mnt/disk1/jch/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-jar-with-dependencies.jar \
+readPath:${readPath} \
+beginStr:${start_date} endStr:${end_date} \
+whatApps:0,4,2,32,31,21,29,27,26,28,34,3,36,6,17,35 \
+whatLabel:return_n_uv_noself \
+whatPages:"详情后沉浸页,回流后沉浸页&内页feed,首页feed,详情页" \
+fuSampleRate:${sampleRate} \
+notUseBucket:1 \
+featureName:${feature_file} \
+featureBucket:20250303_recsys_nor_bucket.txt \
+repartition:8 \
+savePath:${savePath} \

+ 62 - 0
rec_scripts/nor/20260123/nor_train.sh

@@ -0,0 +1,62 @@
+#!/bin/sh
+set -x
+
+start_date=""
+end_date=""
+if(($#==2))
+then
+    start_date=$1
+    end_date=$2
+else
+    start_date=$(date +%Y%m%d -d "-8 $days day")
+    end_date=$(date +%Y%m%d -d "-2 $days day")
+fi
+
+# env
+export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
+export JAVA_HOME=/usr/lib/jvm/java-1.8.0
+
+# params
+FEATURE_FILE=20250627_recsys_nor_name.txt
+BASE_TRAIN_DATA_PATH=/dw/recommend/model/84_recsys_nor_train_data
+PREDICT_RESULT_PATH=/dw/recommend/model/84_recsys_nor_predict_data
+MODEL_SAVE_PATH=/dw/recommend/model/84_recsys_nor_model/model_xgb
+
+train_data_path=""
+for((i=0; i<=21; i++))
+do
+  data_date=$(date -d "$start_date $i day" +"%Y%m%d")
+  if [ "$data_date" -le "$end_date" ]
+  then
+    one_day_data_path="${BASE_TRAIN_DATA_PATH}/${data_date}"
+    if [[ -z $train_data_path ]]
+    then
+      train_data_path=$one_day_data_path
+    else
+      train_data_path="$train_data_path,$one_day_data_path"
+    fi
+  fi
+done
+
+## ******* train *******
+workers=32
+/opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
+--class com.tzld.piaoquan.recommend.model.train_recsys_61_xgb_nor_20241209 \
+--master yarn --driver-memory 4G --executor-memory 10G --executor-cores 1 --num-executors ${workers} \
+--conf spark.yarn.executor.memoryoverhead=2048 \
+--conf spark.shuffle.service.enabled=true \
+--conf spark.shuffle.service.port=7337 \
+--conf spark.shuffle.consolidateFiles=true \
+--conf spark.shuffle.manager=sort \
+--conf spark.storage.memoryFraction=0.4 \
+--conf spark.shuffle.memoryFraction=0.5 \
+--conf spark.default.parallelism=200 \
+--conf spark.sql.debug.maxToStringFields=100 \
+/mnt/disk1/jch/recommend-model/recommend-model-produce/target/recommend-model-produce-jar-with-dependencies.jar \
+featureFile:${FEATURE_FILE} \
+trainPath:${train_data_path} \
+savePath:${PREDICT_RESULT_PATH} \
+modelPath:${MODEL_SAVE_PATH} \
+labelLogType:0 \
+labelLogBase:1.5 \
+eta:0.06 gamma:0.0 max_depth:5 num_round:1000 num_worker:${workers} repartition:20

+ 26 - 0
rec_scripts/nor/20260123/run.sh

@@ -0,0 +1,26 @@
+#!/bin/bash
+
+run_mode=""
+if(($#==1))
+then
+  run_mode=$1
+else
+  exit -1
+fi
+
+if [[ "$run_mode" != "run" ]]
+then
+  exit -1
+fi
+
+dd=$(date +%Y%m%d)
+
+# 0. abs path
+abs_path=$(cd `dirname $0`; pwd)
+log_file="${abs_path}/../../logs/nor_${dd}.log"
+
+# 1. pipline
+pip_sh="${abs_path}/nor_pipline.sh"
+echo `date` "sh +x $pip_sh run"
+sh +x $pip_sh run >> $log_file 2>&1 &
+