train_xgb_model.sh 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. #!/bin/sh
  2. feature_file=""
  3. year=""
  4. if(($#==2))
  5. then
  6. feature_file=$1
  7. year=$2
  8. else
  9. exit -1
  10. fi
  11. # env
  12. export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
  13. export JAVA_HOME=/usr/lib/jvm/java-1.8.0
  14. # params
  15. workers=16
  16. minCnt=10
  17. BASE_TRAIN_DATA_PATH=/dw/recommend/model/user_profile/gender/sample/train
  18. MODEL_SAVE_PATH=/dw/recommend/model/user_profile/gender/model/model_xgb
  19. train_data_path=""
  20. suffix_array=(y 8 4 0 e a c k o w g s u q)
  21. for suffix in "${suffix_array[@]}"
  22. do
  23. one_day_data_path="${BASE_TRAIN_DATA_PATH}/${year}_$suffix"
  24. if [[ -z $train_data_path ]]
  25. then
  26. train_data_path=$one_day_data_path
  27. else
  28. train_data_path="$train_data_path,$one_day_data_path"
  29. fi
  30. done
  31. ## ******* train *******
  32. /opt/apps/SPARK3/spark-3.3.1-hadoop3.2-1.0.5/bin/spark-class org.apache.spark.deploy.SparkSubmit \
  33. --class com.tzld.piaoquan.recommend.model.train_profile_gender_xgb_20251114 \
  34. --master yarn --driver-memory 6G --executor-memory 10G --executor-cores 1 --num-executors ${workers} \
  35. --conf spark.yarn.executor.memoryoverhead=2048 \
  36. --conf spark.shuffle.service.enabled=true \
  37. --conf spark.shuffle.service.port=7337 \
  38. --conf spark.shuffle.consolidateFiles=true \
  39. --conf spark.shuffle.manager=sort \
  40. --conf spark.storage.memoryFraction=0.4 \
  41. --conf spark.shuffle.memoryFraction=0.5 \
  42. --conf spark.default.parallelism=200 \
  43. --conf spark.sql.debug.maxToStringFields=100 \
  44. --files ${feature_file} \
  45. /mnt/disk1/jch/recommend-model/recommend-model-produce/target/recommend-model-produce-jar-with-dependencies.jar \
  46. trainPath:${train_data_path} \
  47. featureFile:${feature_file} \
  48. minCnt:${minCnt} \
  49. modelPath:${MODEL_SAVE_PATH} \
  50. eta:0.06 gamma:0.0 max_depth:4 num_round:1000 num_worker:${workers}