Просмотр исходного кода

Merge branch 'feature/jch' into feature_zhaohaipeng

zhaohaipeng 10 часов назад
Родитель
Сommit
63bea2573a
14 измененных файлов с 2966 добавлено и 0 удалено
  1. 220 0
      recommend-model-produce/src/main/resources/20241209_recsys_nor_name.txt
  2. 380 0
      recommend-model-produce/src/main/resources/20241209_recsys_rov_name.txt
  3. 427 0
      recommend-model-produce/src/main/resources/20250221_recsys_nor_name.txt
  4. 445 0
      recommend-model-produce/src/main/resources/20250303_recsys_nor_name.txt
  5. 432 0
      recommend-model-produce/src/main/resources/20250627_recsys_nor_name.txt
  6. 48 0
      recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/MetricUtils.scala
  7. 128 0
      recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/pred_profile_gender_xgb_20251114.scala
  8. 150 0
      recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/pred_recsys_61_xgb_nor_hdfsfile_20241209.scala
  9. 127 0
      recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/pred_recsys_61_xgb_rov_hdfsfile_20241209.scala
  10. 94 0
      recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/rov_offline_ab_auc.scala
  11. 71 0
      recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/stat_qq.scala
  12. 121 0
      recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_profile_gender_xgb_20251114.scala
  13. 161 0
      recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_recsys_61_xgb_nor_20241209.scala
  14. 162 0
      recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_recsys_61_xgb_rov_20241209.scala

+ 220 - 0
recommend-model-produce/src/main/resources/20241209_recsys_nor_name.txt

@@ -0,0 +1,220 @@
+b111213_1d_ROS
+b111213_1d_ROV*log(return)
+b111213_2h_ROS
+b111213_2h_ROV*log(return)
+b111213_3d_ROS
+b111213_3d_ROV*log(return)
+b111213_7d_ROS
+b111213_7d_ROV*log(return)
+b123_1d_ROS
+b123_1d_ROV*log(return)
+b123_2h_ROS
+b123_2h_ROV*log(return)
+b123_3d_ROS
+b123_3d_ROV*log(return)
+b123_7d_ROS
+b123_7d_ROV*log(return)
+b167_1d_ROS
+b167_1d_ROV*log(return)
+b167_2h_ROS
+b167_2h_ROV*log(return)
+b167_3d_ROS
+b167_3d_ROV*log(return)
+b167_7d_ROS
+b167_7d_ROV*log(return)
+b171819_1d_ROS
+b171819_1d_ROV*log(return)
+b171819_2h_ROS
+b171819_2h_ROV*log(return)
+b171819_3d_ROS
+b171819_3d_ROV*log(return)
+b171819_7d_ROS
+b171819_7d_ROV*log(return)
+b20_24h_r_cnt
+b20_24h_r_cnt4s
+b20_24h_ros
+b20_24h_share_hasreturn
+b20_2h_r_cnt
+b20_2h_r_cnt4s
+b20_2h_ros
+b20_2h_share_hasreturn
+b20_7d_r_cnt
+b20_7d_r_cnt4s
+b20_7d_ros
+b20_7d_share_hasreturn
+b21_24h_r_cnt
+b21_24h_r_cnt4s
+b21_24h_ros
+b21_24h_share_hasreturn
+b21_2h_r_cnt
+b21_2h_r_cnt4s
+b21_2h_ros
+b21_2h_share_hasreturn
+b21_7d_r_cnt
+b21_7d_r_cnt4s
+b21_7d_ros
+b21_7d_share_hasreturn
+b22_24h_r_cnt
+b22_24h_r_cnt4s
+b22_24h_ros
+b22_24h_share_hasreturn
+b22_2h_r_cnt
+b22_2h_r_cnt4s
+b22_2h_ros
+b22_2h_share_hasreturn
+b22_7d_r_cnt
+b22_7d_r_cnt4s
+b22_7d_ros
+b22_7d_share_hasreturn
+b23_14d_r_cnt
+b23_14d_r_cnt4s
+b23_14d_ros
+b23_14d_share_hasreturn
+b23_30d_r_cnt
+b23_30d_r_cnt4s
+b23_30d_ros
+b23_30d_share_hasreturn
+b24_14d_r_cnt
+b24_14d_r_cnt4s
+b24_14d_ros
+b24_14d_share_hasreturn
+b24_30d_r_cnt
+b24_30d_r_cnt4s
+b24_30d_ros
+b24_30d_share_hasreturn
+b25_14d_r_cnt
+b25_14d_r_cnt4s
+b25_14d_ros
+b25_14d_share_hasreturn
+b25_30d_r_cnt
+b25_30d_r_cnt4s
+b25_30d_ros
+b25_30d_share_hasreturn
+b26_35d_r_cnt
+b26_35d_r_cnt4s
+b26_35d_ros
+b26_35d_share_hasreturn
+b26_365d_r_cnt
+b26_365d_r_cnt4s
+b26_365d_ros
+b26_365d_share_hasreturn
+b26_7d_r_cnt
+b26_7d_r_cnt4s
+b26_7d_ros
+b26_7d_share_hasreturn
+b26_90d_r_cnt
+b26_90d_r_cnt4s
+b26_90d_ros
+b26_90d_share_hasreturn
+b27_35d_r_cnt
+b27_35d_r_cnt4s
+b27_35d_ros
+b27_35d_share_hasreturn
+b27_365d_r_cnt
+b27_365d_r_cnt4s
+b27_365d_ros
+b27_365d_share_hasreturn
+b27_7d_r_cnt
+b27_7d_r_cnt4s
+b27_7d_ros
+b27_7d_share_hasreturn
+b27_90d_r_cnt
+b27_90d_r_cnt4s
+b27_90d_ros
+b27_90d_share_hasreturn
+b28_24h_r_cnt
+b28_24h_r_cnt4s
+b28_24h_ros
+b28_24h_share_hasreturn
+b28_2h_r_cnt
+b28_2h_r_cnt4s
+b28_2h_ros
+b28_2h_share_hasreturn
+b28_7d_r_cnt
+b28_7d_r_cnt4s
+b28_7d_ros
+b28_7d_share_hasreturn
+b8910_1d_ROS
+b8910_1d_ROV*log(return)
+b8910_2h_ROS
+b8910_2h_ROV*log(return)
+b8910_3d_ROS
+b8910_3d_ROV*log(return)
+b8910_7d_ROS
+b8910_7d_ROV*log(return)
+bit_rate
+c3_feature_tags_1d_avgscore
+c3_feature_tags_1d_matchnum
+c3_feature_tags_1d_maxscore
+c3_feature_tags_3d_avgscore
+c3_feature_tags_3d_matchnum
+c3_feature_tags_3d_maxscore
+c3_feature_tags_7d_avgscore
+c3_feature_tags_7d_matchnum
+c3_feature_tags_7d_maxscore
+c4_feature_tags_1d_avgscore
+c4_feature_tags_1d_matchnum
+c4_feature_tags_1d_maxscore
+c4_feature_tags_3d_avgscore
+c4_feature_tags_3d_matchnum
+c4_feature_tags_3d_maxscore
+c4_feature_tags_7d_avgscore
+c4_feature_tags_7d_matchnum
+c4_feature_tags_7d_maxscore
+c5_feature_tags_1d_avgscore
+c5_feature_tags_1d_matchnum
+c5_feature_tags_1d_maxscore
+c5_feature_tags_3d_avgscore
+c5_feature_tags_3d_matchnum
+c5_feature_tags_3d_maxscore
+c5_feature_tags_7d_avgscore
+c5_feature_tags_7d_matchnum
+c5_feature_tags_7d_maxscore
+c6_feature_tags_1d_avgscore
+c6_feature_tags_1d_matchnum
+c6_feature_tags_1d_maxscore
+c6_feature_tags_3d_avgscore
+c6_feature_tags_3d_matchnum
+c6_feature_tags_3d_maxscore
+c6_feature_tags_7d_avgscore
+c6_feature_tags_7d_matchnum
+c6_feature_tags_7d_maxscore
+c7_feature_tags_1d_avgscore
+c7_feature_tags_1d_matchnum
+c7_feature_tags_1d_maxscore
+c7_feature_tags_3d_avgscore
+c7_feature_tags_3d_matchnum
+c7_feature_tags_3d_maxscore
+c7_feature_tags_7d_avgscore
+c7_feature_tags_7d_matchnum
+c7_feature_tags_7d_maxscore
+c8_feature_return_num
+c8_feature_return_rank
+c8_feature_return_score
+c8_feature_share_num
+c8_feature_share_rank
+c8_feature_share_score
+c9_feature_return_num
+c9_feature_return_rank
+c9_feature_return_score
+c9_feature_share_num
+c9_feature_share_rank
+c9_feature_share_score
+return_uv_12h
+return_uv_1d
+return_uv_3d
+return_uv_7d
+share_pv_12h
+share_pv_1d
+share_pv_3d
+share_pv_7d
+total_time
+video_sim_cate1_list
+video_sim_cate2
+video_sim_cate2_list
+video_sim_keywords
+video_sim_style
+video_sim_theme
+video_sim_title
+video_sim_topic
+video_sim_user_value

+ 380 - 0
recommend-model-produce/src/main/resources/20241209_recsys_rov_name.txt

@@ -0,0 +1,380 @@
+b111213_12h_ROV
+b111213_12h_ROV*log(return)
+b111213_12h_STR
+b111213_12h_log(return)
+b111213_12h_log(share)
+b111213_1d_ROV
+b111213_1d_ROV*log(return)
+b111213_1d_STR
+b111213_1d_log(return)
+b111213_1d_log(share)
+b111213_1h_ROV
+b111213_1h_ROV*log(return)
+b111213_1h_STR
+b111213_1h_log(return)
+b111213_1h_log(share)
+b111213_2h_ROV
+b111213_2h_ROV*log(return)
+b111213_2h_STR
+b111213_2h_log(return)
+b111213_2h_log(share)
+b111213_3d_ROV
+b111213_3d_ROV*log(return)
+b111213_3d_STR
+b111213_3d_log(return)
+b111213_3d_log(share)
+b111213_3h_ROV
+b111213_3h_ROV*log(return)
+b111213_3h_STR
+b111213_3h_log(return)
+b111213_3h_log(share)
+b111213_4h_ROV
+b111213_4h_ROV*log(return)
+b111213_4h_STR
+b111213_4h_log(return)
+b111213_4h_log(share)
+b111213_7d_ROV
+b111213_7d_ROV*log(return)
+b111213_7d_STR
+b111213_7d_log(return)
+b111213_7d_log(share)
+b123_12h_ROV
+b123_12h_ROV*log(return)
+b123_12h_STR
+b123_12h_log(return)
+b123_12h_log(share)
+b123_1d_ROV
+b123_1d_ROV*log(return)
+b123_1d_STR
+b123_1d_log(return)
+b123_1d_log(share)
+b123_1h_ROV
+b123_1h_ROV*log(return)
+b123_1h_STR
+b123_1h_log(return)
+b123_1h_log(share)
+b123_2h_ROV
+b123_2h_ROV*log(return)
+b123_2h_STR
+b123_2h_log(return)
+b123_2h_log(share)
+b123_3d_ROV
+b123_3d_ROV*log(return)
+b123_3d_STR
+b123_3d_log(return)
+b123_3d_log(share)
+b123_3h_ROV
+b123_3h_ROV*log(return)
+b123_3h_STR
+b123_3h_log(return)
+b123_3h_log(share)
+b123_4h_ROV
+b123_4h_ROV*log(return)
+b123_4h_STR
+b123_4h_log(return)
+b123_4h_log(share)
+b123_7d_ROV
+b123_7d_ROV*log(return)
+b123_7d_STR
+b123_7d_log(return)
+b123_7d_log(share)
+b167_12h_ROV
+b167_12h_ROV*log(return)
+b167_12h_STR
+b167_12h_log(return)
+b167_12h_log(share)
+b167_1d_ROV
+b167_1d_ROV*log(return)
+b167_1d_STR
+b167_1d_log(return)
+b167_1d_log(share)
+b167_1h_ROV
+b167_1h_ROV*log(return)
+b167_1h_STR
+b167_1h_log(return)
+b167_1h_log(share)
+b167_2h_ROV
+b167_2h_ROV*log(return)
+b167_2h_STR
+b167_2h_log(return)
+b167_2h_log(share)
+b167_3d_ROV
+b167_3d_ROV*log(return)
+b167_3d_STR
+b167_3d_log(return)
+b167_3d_log(share)
+b167_3h_ROV
+b167_3h_ROV*log(return)
+b167_3h_STR
+b167_3h_log(return)
+b167_3h_log(share)
+b167_4h_ROV
+b167_4h_ROV*log(return)
+b167_4h_STR
+b167_4h_log(return)
+b167_4h_log(share)
+b167_7d_ROV
+b167_7d_ROV*log(return)
+b167_7d_STR
+b167_7d_log(return)
+b167_7d_log(share)
+b171819_12h_ROV
+b171819_12h_ROV*log(return)
+b171819_12h_STR
+b171819_12h_log(return)
+b171819_12h_log(share)
+b171819_1d_ROV
+b171819_1d_ROV*log(return)
+b171819_1d_STR
+b171819_1d_log(return)
+b171819_1d_log(share)
+b171819_1h_ROV
+b171819_1h_ROV*log(return)
+b171819_1h_STR
+b171819_1h_log(return)
+b171819_1h_log(share)
+b171819_2h_ROV
+b171819_2h_ROV*log(return)
+b171819_2h_STR
+b171819_2h_log(return)
+b171819_2h_log(share)
+b171819_3d_ROV
+b171819_3d_ROV*log(return)
+b171819_3d_STR
+b171819_3d_log(return)
+b171819_3d_log(share)
+b171819_3h_ROV
+b171819_3h_ROV*log(return)
+b171819_3h_STR
+b171819_3h_log(return)
+b171819_3h_log(share)
+b171819_4h_ROV
+b171819_4h_ROV*log(return)
+b171819_4h_STR
+b171819_4h_log(return)
+b171819_4h_log(share)
+b171819_7d_ROV
+b171819_7d_ROV*log(return)
+b171819_7d_STR
+b171819_7d_log(return)
+b171819_7d_log(share)
+b20_12h_return
+b20_12h_rov
+b20_12h_share
+b20_12h_str
+b20_1h_return
+b20_1h_rov
+b20_1h_share
+b20_1h_str
+b20_7d_return
+b20_7d_rov
+b20_7d_share
+b20_7d_str
+b21_12h_return
+b21_12h_rov
+b21_12h_share
+b21_12h_str
+b21_1h_return
+b21_1h_rov
+b21_1h_share
+b21_1h_str
+b21_7d_return
+b21_7d_rov
+b21_7d_share
+b21_7d_str
+b22_12h_return
+b22_12h_rov
+b22_12h_share
+b22_12h_str
+b22_1h_return
+b22_1h_rov
+b22_1h_share
+b22_1h_str
+b22_7d_return
+b22_7d_rov
+b22_7d_share
+b22_7d_str
+b23_30d_return
+b23_30d_rov
+b23_30d_share
+b23_30d_str
+b24_30d_return
+b24_30d_rov
+b24_30d_share
+b24_30d_str
+b25_30d_return
+b25_30d_rov
+b25_30d_share
+b25_30d_str
+b26_35d_return
+b26_35d_rov
+b26_35d_share
+b26_35d_str
+b26_365d_return
+b26_365d_rov
+b26_365d_share
+b26_365d_str
+b26_7d_return
+b26_7d_rov
+b26_7d_share
+b26_7d_str
+b27_35d_return
+b27_35d_rov
+b27_35d_share
+b27_35d_str
+b27_365d_return
+b27_365d_rov
+b27_365d_share
+b27_365d_str
+b27_7d_return
+b27_7d_rov
+b27_7d_share
+b27_7d_str
+b28_12h_return
+b28_12h_rov
+b28_12h_share
+b28_12h_str
+b28_1h_return
+b28_1h_rov
+b28_1h_share
+b28_1h_str
+b28_24h_return
+b28_24h_rov
+b28_24h_share
+b28_24h_str
+b28_2h_return
+b28_2h_rov
+b28_2h_share
+b28_2h_str
+b28_4h_return
+b28_4h_rov
+b28_4h_share
+b28_4h_str
+b28_6h_return
+b28_6h_rov
+b28_6h_share
+b28_6h_str
+b28_7d_return
+b28_7d_rov
+b28_7d_share
+b28_7d_str
+b8910_12h_ROV
+b8910_12h_ROV*log(return)
+b8910_12h_STR
+b8910_12h_log(return)
+b8910_12h_log(share)
+b8910_1d_ROV
+b8910_1d_ROV*log(return)
+b8910_1d_STR
+b8910_1d_log(return)
+b8910_1d_log(share)
+b8910_1h_ROV
+b8910_1h_ROV*log(return)
+b8910_1h_STR
+b8910_1h_log(return)
+b8910_1h_log(share)
+b8910_2h_ROV
+b8910_2h_ROV*log(return)
+b8910_2h_STR
+b8910_2h_log(return)
+b8910_2h_log(share)
+b8910_3d_ROV
+b8910_3d_ROV*log(return)
+b8910_3d_STR
+b8910_3d_log(return)
+b8910_3d_log(share)
+b8910_3h_ROV
+b8910_3h_ROV*log(return)
+b8910_3h_STR
+b8910_3h_log(return)
+b8910_3h_log(share)
+b8910_4h_ROV
+b8910_4h_ROV*log(return)
+b8910_4h_STR
+b8910_4h_log(return)
+b8910_4h_log(share)
+b8910_7d_ROV
+b8910_7d_ROV*log(return)
+b8910_7d_STR
+b8910_7d_log(return)
+b8910_7d_log(share)
+bit_rate
+c3_feature_tags_1d_avgscore
+c3_feature_tags_1d_matchnum
+c3_feature_tags_1d_maxscore
+c3_feature_tags_3d_avgscore
+c3_feature_tags_3d_matchnum
+c3_feature_tags_3d_maxscore
+c3_feature_tags_7d_avgscore
+c3_feature_tags_7d_matchnum
+c3_feature_tags_7d_maxscore
+c4_feature_tags_1d_avgscore
+c4_feature_tags_1d_matchnum
+c4_feature_tags_1d_maxscore
+c4_feature_tags_3d_avgscore
+c4_feature_tags_3d_matchnum
+c4_feature_tags_3d_maxscore
+c4_feature_tags_7d_avgscore
+c4_feature_tags_7d_matchnum
+c4_feature_tags_7d_maxscore
+c5_feature_tags_1d_avgscore
+c5_feature_tags_1d_matchnum
+c5_feature_tags_1d_maxscore
+c5_feature_tags_3d_avgscore
+c5_feature_tags_3d_matchnum
+c5_feature_tags_3d_maxscore
+c5_feature_tags_7d_avgscore
+c5_feature_tags_7d_matchnum
+c5_feature_tags_7d_maxscore
+c6_feature_tags_1d_avgscore
+c6_feature_tags_1d_matchnum
+c6_feature_tags_1d_maxscore
+c6_feature_tags_3d_avgscore
+c6_feature_tags_3d_matchnum
+c6_feature_tags_3d_maxscore
+c6_feature_tags_7d_avgscore
+c6_feature_tags_7d_matchnum
+c6_feature_tags_7d_maxscore
+c7_feature_tags_1d_avgscore
+c7_feature_tags_1d_matchnum
+c7_feature_tags_1d_maxscore
+c7_feature_tags_3d_avgscore
+c7_feature_tags_3d_matchnum
+c7_feature_tags_3d_maxscore
+c7_feature_tags_7d_avgscore
+c7_feature_tags_7d_matchnum
+c7_feature_tags_7d_maxscore
+c8_feature_return_num
+c8_feature_return_rank
+c8_feature_return_score
+c8_feature_share_num
+c8_feature_share_rank
+c8_feature_share_score
+c9_feature_return_num
+c9_feature_return_rank
+c9_feature_return_score
+c9_feature_share_num
+c9_feature_share_rank
+c9_feature_share_score
+playcnt_1d
+playcnt_3d
+playcnt_6h
+playcnt_7d
+return_uv_12h
+return_uv_1d
+return_uv_3d
+return_uv_7d
+share_pv_12h
+share_pv_1d
+share_pv_3d
+share_pv_7d
+total_time
+video_sim_cate1_list
+video_sim_cate2
+video_sim_cate2_list
+video_sim_keywords
+video_sim_style
+video_sim_theme
+video_sim_title
+video_sim_topic
+video_sim_user_value

+ 427 - 0
recommend-model-produce/src/main/resources/20250221_recsys_nor_name.txt

@@ -0,0 +1,427 @@
+b10_12h_is_share
+b10_12h_return_n_uv
+b10_12h_ros
+b10_12h_ros_minus
+b10_12h_rovn
+b10_12h_str
+b10_12h_str_plus
+b10_1h_is_share
+b10_1h_return_n_uv
+b10_1h_ros
+b10_1h_ros_minus
+b10_1h_rovn
+b10_1h_str
+b10_1h_str_plus
+b11_12h_is_share
+b11_12h_return_n_uv
+b11_12h_ros
+b11_12h_ros_minus
+b11_12h_rovn
+b11_12h_str
+b11_12h_str_plus
+b11_168h_is_share
+b11_168h_return_n_uv
+b11_168h_ros
+b11_168h_ros_minus
+b11_168h_rovn
+b11_168h_str
+b11_168h_str_plus
+b13_168h_is_share
+b13_168h_return_n_uv
+b13_168h_ros
+b13_168h_ros_minus
+b13_168h_ros_n
+b13_168h_ros_one
+b13_168h_rovn
+b13_168h_str
+b13_168h_str_plus
+b13_24h_is_share
+b13_24h_return_n_uv
+b13_24h_ros
+b13_24h_ros_minus
+b13_24h_ros_n
+b13_24h_ros_one
+b13_24h_rovn
+b13_24h_str
+b13_24h_str_plus
+b1_168h_exp
+b1_168h_is_return_1
+b1_168h_is_share
+b1_168h_return_n_uv
+b1_168h_ros
+b1_168h_ros_minus
+b1_168h_ros_n
+b1_168h_ros_one
+b1_168h_rovn
+b1_168h_share_cnt
+b1_168h_str
+b1_168h_str_plus
+b1_1h_exp
+b1_1h_is_return_1
+b1_1h_is_share
+b1_1h_return_n_uv
+b1_1h_ros
+b1_1h_ros_minus
+b1_1h_ros_n
+b1_1h_ros_one
+b1_1h_rovn
+b1_1h_share_cnt
+b1_1h_str
+b1_1h_str_plus
+b1_24h_exp
+b1_24h_is_return_1
+b1_24h_is_share
+b1_24h_return_n_uv
+b1_24h_ros
+b1_24h_ros_minus
+b1_24h_ros_n
+b1_24h_ros_one
+b1_24h_rovn
+b1_24h_share_cnt
+b1_24h_str
+b1_24h_str_plus
+b1_3h_exp
+b1_3h_is_return_1
+b1_3h_is_share
+b1_3h_return_n_uv
+b1_3h_ros
+b1_3h_ros_minus
+b1_3h_ros_n
+b1_3h_ros_one
+b1_3h_rovn
+b1_3h_share_cnt
+b1_3h_str
+b1_3h_str_plus
+b1_72h_exp
+b1_72h_is_return_1
+b1_72h_is_share
+b1_72h_return_n_uv
+b1_72h_ros
+b1_72h_ros_minus
+b1_72h_ros_n
+b1_72h_ros_one
+b1_72h_rovn
+b1_72h_share_cnt
+b1_72h_str
+b1_72h_str_plus
+b2_1h_is_return_1
+b2_1h_is_share
+b2_1h_return_n_uv
+b2_1h_ros
+b2_1h_ros_minus
+b2_1h_ros_n
+b2_1h_ros_one
+b2_1h_rovn
+b2_1h_share_cnt
+b2_1h_str
+b2_1h_str_plus
+b2_24h_is_return_1
+b2_24h_is_share
+b2_24h_return_n_uv
+b2_24h_ros
+b2_24h_ros_minus
+b2_24h_ros_n
+b2_24h_ros_one
+b2_24h_rovn
+b2_24h_share_cnt
+b2_24h_str
+b2_24h_str_plus
+b2_3h_is_return_1
+b2_3h_is_share
+b2_3h_return_n_uv
+b2_3h_ros
+b2_3h_ros_minus
+b2_3h_ros_n
+b2_3h_ros_one
+b2_3h_rovn
+b2_3h_share_cnt
+b2_3h_str
+b2_3h_str_plus
+b3_168h_is_return_1
+b3_168h_is_share
+b3_168h_return_n_uv
+b3_168h_ros
+b3_168h_ros_minus
+b3_168h_ros_n
+b3_168h_ros_one
+b3_168h_rovn
+b3_168h_share_cnt
+b3_168h_str
+b3_168h_str_plus
+b3_24h_is_return_1
+b3_24h_is_share
+b3_24h_return_n_uv
+b3_24h_ros
+b3_24h_ros_minus
+b3_24h_ros_n
+b3_24h_ros_one
+b3_24h_rovn
+b3_24h_share_cnt
+b3_24h_str
+b3_24h_str_plus
+b4_12h_is_return_1
+b4_12h_is_share
+b4_12h_return_n_uv
+b4_12h_ros
+b4_12h_ros_minus
+b4_12h_ros_n
+b4_12h_ros_one
+b4_12h_rovn
+b4_12h_share_cnt
+b4_12h_str
+b4_12h_str_plus
+b4_1h_is_return_1
+b4_1h_is_share
+b4_1h_return_n_uv
+b4_1h_ros
+b4_1h_ros_minus
+b4_1h_ros_n
+b4_1h_ros_one
+b4_1h_rovn
+b4_1h_share_cnt
+b4_1h_str
+b4_1h_str_plus
+b5_168h_is_share
+b5_168h_return_n_uv
+b5_168h_ros
+b5_168h_ros_minus
+b5_168h_ros_n
+b5_168h_ros_one
+b5_168h_rovn
+b5_168h_str
+b5_168h_str_plus
+b5_72h_is_share
+b5_72h_return_n_uv
+b5_72h_ros
+b5_72h_ros_minus
+b5_72h_ros_n
+b5_72h_ros_one
+b5_72h_rovn
+b5_72h_str
+b5_72h_str_plus
+b6_1h_is_share
+b6_1h_return_n_uv
+b6_1h_ros
+b6_1h_ros_minus
+b6_1h_ros_n
+b6_1h_ros_one
+b6_1h_rovn
+b6_1h_str
+b6_1h_str_plus
+b6_24h_is_share
+b6_24h_return_n_uv
+b6_24h_ros
+b6_24h_ros_minus
+b6_24h_ros_n
+b6_24h_ros_one
+b6_24h_rovn
+b6_24h_str
+b6_24h_str_plus
+b7_168h_is_share
+b7_168h_return_n_uv
+b7_168h_ros
+b7_168h_ros_minus
+b7_168h_rovn
+b7_168h_str
+b7_168h_str_plus
+b7_24h_is_share
+b7_24h_return_n_uv
+b7_24h_ros
+b7_24h_ros_minus
+b7_24h_rovn
+b7_24h_str
+b7_24h_str_plus
+b8_24h_is_share
+b8_24h_return_n_uv
+b8_24h_ros
+b8_24h_ros_minus
+b8_24h_rovn
+b8_24h_str
+b8_24h_str_plus
+b9_24h_is_share
+b9_24h_return_n_uv
+b9_24h_ros
+b9_24h_ros_minus
+b9_24h_rovn
+b9_24h_str
+b9_24h_str_plus
+c1_168h_is_return_1
+c1_168h_is_share
+c1_168h_return_n_uv
+c1_168h_ros
+c1_168h_ros_minus
+c1_168h_ros_n
+c1_168h_ros_one
+c1_168h_rovn
+c1_168h_share_cnt
+c1_168h_str
+c1_168h_str_plus
+c1_72h_is_return_1
+c1_72h_is_share
+c1_72h_return_n_uv
+c1_72h_ros
+c1_72h_ros_minus
+c1_72h_ros_n
+c1_72h_ros_one
+c1_72h_rovn
+c1_72h_share_cnt
+c1_72h_str
+c1_72h_str_plus
+c4_168h_avg_ros
+c4_168h_avg_ros_minus
+c4_168h_avg_ros_one
+c4_168h_avg_rovn
+c4_168h_avg_str
+c4_168h_avg_str_one
+c4_168h_avg_str_plus
+c4_168h_max_ros
+c4_168h_max_ros_minus
+c4_168h_max_ros_one
+c4_168h_max_rovn
+c4_168h_max_str
+c4_168h_max_str_one
+c4_168h_max_str_plus
+c4_168h_min_ros
+c4_168h_min_ros_minus
+c4_168h_min_ros_one
+c4_168h_min_rovn
+c4_168h_min_str
+c4_168h_min_str_one
+c4_168h_min_str_plus
+c4_72h_avg_ros
+c4_72h_avg_ros_minus
+c4_72h_avg_ros_one
+c4_72h_avg_rovn
+c4_72h_avg_str
+c4_72h_avg_str_one
+c4_72h_avg_str_plus
+c4_72h_max_ros
+c4_72h_max_ros_minus
+c4_72h_max_ros_one
+c4_72h_max_rovn
+c4_72h_max_str
+c4_72h_max_str_one
+c4_72h_max_str_plus
+c4_72h_min_ros
+c4_72h_min_ros_minus
+c4_72h_min_ros_one
+c4_72h_min_rovn
+c4_72h_min_str
+c4_72h_min_str_one
+c4_72h_min_str_plus
+c5_tags_1d_avgscore
+c5_tags_1d_matchnum
+c5_tags_1d_maxscore
+c5_tags_3d_avgscore
+c5_tags_3d_matchnum
+c5_tags_3d_maxscore
+c5_tags_7d_avgscore
+c5_tags_7d_matchnum
+c5_tags_7d_maxscore
+c6_tags_1d_avgscore
+c6_tags_1d_matchnum
+c6_tags_1d_maxscore
+c6_tags_3d_avgscore
+c6_tags_3d_matchnum
+c6_tags_3d_maxscore
+c6_tags_7d_avgscore
+c6_tags_7d_matchnum
+c6_tags_7d_maxscore
+c7_return_num
+c7_return_rank
+c7_return_score
+c7_share_num
+c7_share_rank
+c7_share_score
+c8_return_num
+c8_return_rank
+c8_return_score
+c8_share_num
+c8_share_rank
+c8_share_score
+c9_c1s@mu
+c9_c1s@ros
+c9_c1s@ros_minus
+c9_c1s@ros_one
+c9_c1s@rp
+c9_c1s@ru
+c9_c1s@sp
+c9_c2s@mu
+c9_c2s@ros
+c9_c2s@ros_minus
+c9_c2s@ros_one
+c9_c2s@rp
+c9_c2s@ru
+c9_c2s@sp
+c9_l1s@mu
+c9_l1s@ros
+c9_l1s@ros_minus
+c9_l1s@ros_one
+c9_l1s@rp
+c9_l1s@ru
+c9_l1s@sp
+c9_l2s@mu
+c9_l2s@ros
+c9_l2s@ros_minus
+c9_l2s@ros_one
+c9_l2s@rp
+c9_l2s@ru
+c9_l2s@sp
+c9_lrs@1@ts
+c9_lrs@1@uv
+c9_lrs@1_title
+c9_lrs@2@ts
+c9_lrs@2@uv
+c9_lrs@2_title
+c9_lss@1@cnt
+c9_lss@1@ts
+c9_lss@1_title
+c9_lss@2@cnt
+c9_lss@2@ts
+c9_lss@2_title
+c9_m_r_uv
+c9_m_s_cnt
+c9_mrs@1@ts
+c9_mrs@1@uv
+c9_mrs@1_title
+c9_mrs@2@ts
+c9_mrs@2@uv
+c9_mrs@2_title
+c9_mss@1@cnt
+c9_mss@1@ts
+c9_mss@1_title
+c9_mss@2@cnt
+c9_mss@2@ts
+c9_mss@2_title
+c9_r_pv
+c9_r_uv
+c9_ros
+c9_ros_minus
+c9_ros_one
+c9_s_cnt
+c9_s_pv
+d1_ros_cf_rank
+d1_ros_cf_score
+d1_rov_cf_rank
+d1_rov_cf_score
+d2_onlines
+d2_rank
+d2_score
+d3_exp
+d3_return_n
+d3_rovn
+h@bit_rate
+h@total_time
+h@ts
+h@tt@1
+hour
+hr_sim_cate1_list
+hr_sim_cate2
+hr_sim_cate2_list
+hr_sim_keywords
+hr_sim_title
+hr_sim_topic
+r@bit_rate
+r@total_time
+r@ts
+r@tt@1

+ 445 - 0
recommend-model-produce/src/main/resources/20250303_recsys_nor_name.txt

@@ -0,0 +1,445 @@
+b10_12h_is_share
+b10_12h_return_n_uv
+b10_12h_ros
+b10_12h_ros_minus
+b10_12h_rovn
+b10_12h_str
+b10_12h_str_plus
+b10_1h_is_share
+b10_1h_return_n_uv
+b10_1h_ros
+b10_1h_ros_minus
+b10_1h_rovn
+b10_1h_str
+b10_1h_str_plus
+b11_12h_is_share
+b11_12h_return_n_uv
+b11_12h_ros
+b11_12h_ros_minus
+b11_12h_rovn
+b11_12h_str
+b11_12h_str_plus
+b11_168h_is_share
+b11_168h_return_n_uv
+b11_168h_ros
+b11_168h_ros_minus
+b11_168h_rovn
+b11_168h_str
+b11_168h_str_plus
+b13_168h_is_share
+b13_168h_return_n_uv
+b13_168h_ros
+b13_168h_ros_minus
+b13_168h_ros_n
+b13_168h_ros_one
+b13_168h_rovn
+b13_168h_str
+b13_168h_str_plus
+b13_24h_is_share
+b13_24h_return_n_uv
+b13_24h_ros
+b13_24h_ros_minus
+b13_24h_ros_n
+b13_24h_ros_one
+b13_24h_rovn
+b13_24h_str
+b13_24h_str_plus
+b1_168h_exp
+b1_168h_is_return_1
+b1_168h_is_share
+b1_168h_return_n_uv
+b1_168h_ros
+b1_168h_ros_minus
+b1_168h_ros_n
+b1_168h_ros_one
+b1_168h_rovn
+b1_168h_share_cnt
+b1_168h_str
+b1_168h_str_plus
+b1_1h_exp
+b1_1h_is_return_1
+b1_1h_is_share
+b1_1h_return_n_uv
+b1_1h_ros
+b1_1h_ros_minus
+b1_1h_ros_n
+b1_1h_ros_one
+b1_1h_rovn
+b1_1h_share_cnt
+b1_1h_str
+b1_1h_str_plus
+b1_24h_exp
+b1_24h_is_return_1
+b1_24h_is_share
+b1_24h_return_n_uv
+b1_24h_ros
+b1_24h_ros_minus
+b1_24h_ros_n
+b1_24h_ros_one
+b1_24h_rovn
+b1_24h_share_cnt
+b1_24h_str
+b1_24h_str_plus
+b1_3h_exp
+b1_3h_is_return_1
+b1_3h_is_share
+b1_3h_return_n_uv
+b1_3h_ros
+b1_3h_ros_minus
+b1_3h_ros_n
+b1_3h_ros_one
+b1_3h_rovn
+b1_3h_share_cnt
+b1_3h_str
+b1_3h_str_plus
+b1_72h_exp
+b1_72h_is_return_1
+b1_72h_is_share
+b1_72h_return_n_uv
+b1_72h_ros
+b1_72h_ros_minus
+b1_72h_ros_n
+b1_72h_ros_one
+b1_72h_rovn
+b1_72h_share_cnt
+b1_72h_str
+b1_72h_str_plus
+b2_1h_is_return_1
+b2_1h_is_share
+b2_1h_return_n_uv
+b2_1h_ros
+b2_1h_ros_minus
+b2_1h_ros_n
+b2_1h_ros_one
+b2_1h_rovn
+b2_1h_share_cnt
+b2_1h_str
+b2_1h_str_plus
+b2_24h_is_return_1
+b2_24h_is_share
+b2_24h_return_n_uv
+b2_24h_ros
+b2_24h_ros_minus
+b2_24h_ros_n
+b2_24h_ros_one
+b2_24h_rovn
+b2_24h_share_cnt
+b2_24h_str
+b2_24h_str_plus
+b2_3h_is_return_1
+b2_3h_is_share
+b2_3h_return_n_uv
+b2_3h_ros
+b2_3h_ros_minus
+b2_3h_ros_n
+b2_3h_ros_one
+b2_3h_rovn
+b2_3h_share_cnt
+b2_3h_str
+b2_3h_str_plus
+b3_168h_is_return_1
+b3_168h_is_share
+b3_168h_return_n_uv
+b3_168h_ros
+b3_168h_ros_minus
+b3_168h_ros_n
+b3_168h_ros_one
+b3_168h_rovn
+b3_168h_share_cnt
+b3_168h_str
+b3_168h_str_plus
+b3_24h_is_return_1
+b3_24h_is_share
+b3_24h_return_n_uv
+b3_24h_ros
+b3_24h_ros_minus
+b3_24h_ros_n
+b3_24h_ros_one
+b3_24h_rovn
+b3_24h_share_cnt
+b3_24h_str
+b3_24h_str_plus
+b4_12h_is_return_1
+b4_12h_is_share
+b4_12h_return_n_uv
+b4_12h_ros
+b4_12h_ros_minus
+b4_12h_ros_n
+b4_12h_ros_one
+b4_12h_rovn
+b4_12h_share_cnt
+b4_12h_str
+b4_12h_str_plus
+b4_1h_is_return_1
+b4_1h_is_share
+b4_1h_return_n_uv
+b4_1h_ros
+b4_1h_ros_minus
+b4_1h_ros_n
+b4_1h_ros_one
+b4_1h_rovn
+b4_1h_share_cnt
+b4_1h_str
+b4_1h_str_plus
+b5_168h_is_share
+b5_168h_return_n_uv
+b5_168h_ros
+b5_168h_ros_minus
+b5_168h_ros_n
+b5_168h_ros_one
+b5_168h_rovn
+b5_168h_str
+b5_168h_str_plus
+b5_72h_is_share
+b5_72h_return_n_uv
+b5_72h_ros
+b5_72h_ros_minus
+b5_72h_ros_n
+b5_72h_ros_one
+b5_72h_rovn
+b5_72h_str
+b5_72h_str_plus
+b6_1h_is_share
+b6_1h_return_n_uv
+b6_1h_ros
+b6_1h_ros_minus
+b6_1h_ros_n
+b6_1h_ros_one
+b6_1h_rovn
+b6_1h_str
+b6_1h_str_plus
+b6_24h_is_share
+b6_24h_return_n_uv
+b6_24h_ros
+b6_24h_ros_minus
+b6_24h_ros_n
+b6_24h_ros_one
+b6_24h_rovn
+b6_24h_str
+b6_24h_str_plus
+b7_168h_is_share
+b7_168h_return_n_uv
+b7_168h_ros
+b7_168h_ros_minus
+b7_168h_rovn
+b7_168h_str
+b7_168h_str_plus
+b7_24h_is_share
+b7_24h_return_n_uv
+b7_24h_ros
+b7_24h_ros_minus
+b7_24h_rovn
+b7_24h_str
+b7_24h_str_plus
+b8_24h_is_share
+b8_24h_return_n_uv
+b8_24h_ros
+b8_24h_ros_minus
+b8_24h_rovn
+b8_24h_str
+b8_24h_str_plus
+b9_24h_is_share
+b9_24h_return_n_uv
+b9_24h_ros
+b9_24h_ros_minus
+b9_24h_rovn
+b9_24h_str
+b9_24h_str_plus
+c1_168h_is_return_1
+c1_168h_is_share
+c1_168h_return_n_uv
+c1_168h_ros
+c1_168h_ros_minus
+c1_168h_ros_n
+c1_168h_ros_one
+c1_168h_rovn
+c1_168h_share_cnt
+c1_168h_str
+c1_168h_str_plus
+c1_72h_is_return_1
+c1_72h_is_share
+c1_72h_return_n_uv
+c1_72h_ros
+c1_72h_ros_minus
+c1_72h_ros_n
+c1_72h_ros_one
+c1_72h_rovn
+c1_72h_share_cnt
+c1_72h_str
+c1_72h_str_plus
+c5_tags_1d_avgscore
+c5_tags_1d_matchnum
+c5_tags_1d_maxscore
+c5_tags_3d_avgscore
+c5_tags_3d_matchnum
+c5_tags_3d_maxscore
+c5_tags_7d_avgscore
+c5_tags_7d_matchnum
+c5_tags_7d_maxscore
+c6_tags_1d_avgscore
+c6_tags_1d_matchnum
+c6_tags_1d_maxscore
+c6_tags_3d_avgscore
+c6_tags_3d_matchnum
+c6_tags_3d_maxscore
+c6_tags_7d_avgscore
+c6_tags_7d_matchnum
+c6_tags_7d_maxscore
+c7_return_num
+c7_return_rank
+c7_return_score
+c7_share_num
+c7_share_rank
+c7_share_score
+c8_return_num
+c8_return_rank
+c8_return_score
+c8_share_num
+c8_share_rank
+c8_share_score
+c9_c1s@mu
+c9_c1s@ros
+c9_c1s@ros_minus
+c9_c1s@ros_one
+c9_c1s@rp
+c9_c1s@ru
+c9_c1s@sp
+c9_c2s@mu
+c9_c2s@ros
+c9_c2s@ros_minus
+c9_c2s@ros_one
+c9_c2s@rp
+c9_c2s@ru
+c9_c2s@sp
+c9_l1s@mu
+c9_l1s@ros
+c9_l1s@ros_minus
+c9_l1s@ros_one
+c9_l1s@rp
+c9_l1s@ru
+c9_l1s@sp
+c9_l2s@mu
+c9_l2s@ros
+c9_l2s@ros_minus
+c9_l2s@ros_one
+c9_l2s@rp
+c9_l2s@ru
+c9_l2s@sp
+c9_lrs@1@ts
+c9_lrs@1@uv
+c9_lrs@1_title
+c9_lrs@2@ts
+c9_lrs@2@uv
+c9_lrs@2_title
+c9_lss@1@cnt
+c9_lss@1@ts
+c9_lss@1_title
+c9_lss@2@cnt
+c9_lss@2@ts
+c9_lss@2_title
+c9_m_r_uv
+c9_m_s_cnt
+c9_mrs@1@ts
+c9_mrs@1@uv
+c9_mrs@1_title
+c9_mrs@2@ts
+c9_mrs@2@uv
+c9_mrs@2_title
+c9_mss@1@cnt
+c9_mss@1@ts
+c9_mss@1_title
+c9_mss@2@cnt
+c9_mss@2@ts
+c9_mss@2_title
+c9_r_pv
+c9_r_uv
+c9_ros
+c9_ros_minus
+c9_ros_one
+c9_s_cnt
+c9_s_pv
+d1_ros_cf_rank
+d1_ros_cf_score
+d1_rov_cf_rank
+d1_rov_cf_score
+d2_onlines
+d2_rank
+d2_score
+d3_exp
+d3_return_n
+d3_rovn
+h@bit_rate
+h@total_time
+h@ts
+h@tt@1
+hour
+hr_sim_cate1_list
+hr_sim_cate2
+hr_sim_cate2_list
+hr_sim_keywords
+hr_sim_title
+hr_sim_topic
+r@bit_rate
+r@total_time
+r@ts
+r@tt@1
+h@cate1_list@1
+h@cate1_list@10
+h@cate1_list@11
+h@cate1_list@13
+h@cate1_list@14
+h@cate1_list@15
+h@cate1_list@2
+h@cate1_list@3
+h@cate1_list@4
+h@cate1_list@5
+h@cate1_list@6
+h@cate1_list@7
+h@cate1_list@9
+h@cate2@1
+h@cate2@12
+h@cate2@13
+h@cate2@15
+h@cate2@17
+h@cate2@18
+h@cate2@19
+h@cate2@2
+h@cate2@21
+h@cate2@22
+h@cate2@23
+h@cate2@3
+h@cate2@5
+h@cate2@7
+h@cate2@8
+h@festive_label1@1
+h@festive_label1@2
+h@festive_label1@3
+h@festive_label1@4
+r@cate1_list@1
+r@cate1_list@10
+r@cate1_list@13
+r@cate1_list@14
+r@cate1_list@15
+r@cate1_list@2
+r@cate1_list@3
+r@cate1_list@5
+r@cate1_list@6
+r@cate1_list@7
+r@cate1_list@9
+r@cate2@1
+r@cate2@10
+r@cate2@12
+r@cate2@13
+r@cate2@14
+r@cate2@15
+r@cate2@17
+r@cate2@19
+r@cate2@2
+r@cate2@22
+r@cate2@23
+r@cate2@26
+r@cate2@3
+r@cate2@7
+r@cate2@9
+r@festive_label1@1
+r@festive_label1@3

+ 432 - 0
recommend-model-produce/src/main/resources/20250627_recsys_nor_name.txt

@@ -0,0 +1,432 @@
+b0_12h@return_1_uv
+b0_12h@ros1_#
+b0_12h@ros_#
+b0_12h@ros_minus1_#
+b0_12h@ros_minus_#
+b0_12h@ros_n1_#
+b0_12h@ros_n_#
+b0_12h@ros_one
+b0_12h@rovn1_#
+b0_12h@rovn_#
+b0_1h@return_1_uv
+b0_1h@ros1_#
+b0_1h@ros_#
+b0_1h@ros_minus1_#
+b0_1h@ros_minus_#
+b0_1h@ros_n1_#
+b0_1h@ros_n_#
+b0_1h@ros_one
+b0_1h@rovn1_#
+b0_1h@rovn_#
+b0_3h@return_1_uv
+b0_3h@ros1_#
+b0_3h@ros_#
+b0_3h@ros_minus1_#
+b0_3h@ros_minus_#
+b0_3h@ros_n1_#
+b0_3h@ros_n_#
+b0_3h@ros_one
+b0_3h@rovn1_#
+b0_3h@rovn_#
+b0_6h@return_1_uv
+b0_6h@ros1_#
+b0_6h@ros_#
+b0_6h@ros_minus1_#
+b0_6h@ros_minus_#
+b0_6h@ros_n1_#
+b0_6h@ros_n_#
+b0_6h@ros_one
+b0_6h@rovn1_#
+b0_6h@rovn_#
+b10_12h@is_share
+b10_12h@return_n_uv
+b10_12h@ros_#
+b10_12h@ros_minus_#
+b10_12h@rovn_#
+b10_12h@str
+b10_12h@str_plus
+b10_1h@is_share
+b10_1h@return_n_uv
+b10_1h@ros_#
+b10_1h@ros_minus_#
+b10_1h@rovn_#
+b10_1h@str
+b10_1h@str_plus
+b11_12h@is_share
+b11_12h@return_n_uv
+b11_12h@ros_#
+b11_12h@ros_minus_#
+b11_12h@rovn_#
+b11_12h@str
+b11_12h@str_plus
+b13_1h@exp
+b13_1h@is_share
+b13_1h@ros_#
+b13_1h@ros_minus_#
+b13_1h@ros_n_#
+b13_1h@ros_one
+b13_1h@rovn_#
+b13_1h@share_cnt
+b13_1h@str
+b13_1h@str_plus
+b13_24h@exp
+b13_24h@is_share
+b13_24h@ros_#
+b13_24h@ros_minus_#
+b13_24h@ros_n_#
+b13_24h@ros_one
+b13_24h@rovn_#
+b13_24h@share_cnt
+b13_24h@str
+b13_24h@str_plus
+b13_3h@exp
+b13_3h@is_share
+b13_3h@ros_#
+b13_3h@ros_minus_#
+b13_3h@ros_n_#
+b13_3h@ros_one
+b13_3h@rovn_#
+b13_3h@share_cnt
+b13_3h@str
+b13_3h@str_plus
+b13_72h@exp
+b13_72h@is_share
+b13_72h@ros_#
+b13_72h@ros_minus_#
+b13_72h@ros_n_#
+b13_72h@ros_one
+b13_72h@rovn_#
+b13_72h@share_cnt
+b13_72h@str
+b13_72h@str_plus
+b1_1h@ros_#
+b1_1h@ros_minus_#
+b1_1h@ros_n_#
+b1_1h@ros_one
+b1_1h@rovn_#
+b1_24h@ros_#
+b1_24h@ros_minus_#
+b1_24h@ros_n_#
+b1_24h@ros_one
+b1_24h@rovn_#
+b1_3h@ros_#
+b1_3h@ros_minus_#
+b1_3h@ros_n_#
+b1_3h@ros_one
+b1_3h@rovn_#
+b1_72h@ros_#
+b1_72h@ros_minus_#
+b1_72h@ros_n_#
+b1_72h@ros_one
+b1_72h@rovn_#
+b2_24h@return_n_uv
+b2_24h@ros_#
+b2_24h@ros_minus_#
+b2_24h@ros_n_#
+b2_24h@ros_one
+b2_24h@rovn_#
+b2_3h@return_n_uv
+b2_3h@ros_#
+b2_3h@ros_minus_#
+b2_3h@ros_n_#
+b2_3h@ros_one
+b2_3h@rovn_#
+b3_24h@is_share
+b3_24h@return_n_uv
+b3_24h@ros_#
+b3_24h@ros_minus_#
+b3_24h@ros_n_#
+b3_24h@ros_one
+b3_24h@rovn_#
+b3_24h@str
+b3_24h@str_plus
+b3_72h@is_share
+b3_72h@return_n_uv
+b3_72h@ros_#
+b3_72h@ros_minus_#
+b3_72h@ros_n_#
+b3_72h@ros_one
+b3_72h@rovn_#
+b3_72h@str
+b3_72h@str_plus
+b4_12h@is_share
+b4_12h@return_n_uv
+b4_12h@ros_#
+b4_12h@ros_minus_#
+b4_12h@ros_n_#
+b4_12h@ros_one
+b4_12h@rovn_#
+b4_3h@is_share
+b4_3h@return_n_uv
+b4_3h@ros_#
+b4_3h@ros_minus_#
+b4_3h@ros_n_#
+b4_3h@ros_one
+b4_3h@rovn_#
+b5_12h@exp
+b5_12h@is_share
+b5_12h@return_n_uv
+b5_12h@ros_#
+b5_12h@ros_minus_#
+b5_12h@ros_n_#
+b5_12h@ros_one
+b5_12h@rovn_#
+b5_12h@share_cnt
+b5_12h@str
+b5_12h@str_plus
+b5_1h@exp
+b5_1h@is_share
+b5_1h@return_n_uv
+b5_1h@ros_#
+b5_1h@ros_minus_#
+b5_1h@ros_n_#
+b5_1h@ros_one
+b5_1h@rovn_#
+b5_1h@share_cnt
+b5_1h@str
+b5_1h@str_plus
+b5_24h@exp
+b5_24h@is_share
+b5_24h@return_n_uv
+b5_24h@ros_#
+b5_24h@ros_minus_#
+b5_24h@ros_n_#
+b5_24h@ros_one
+b5_24h@rovn_#
+b5_24h@share_cnt
+b5_24h@str
+b5_24h@str_plus
+b5_3h@exp
+b5_3h@is_share
+b5_3h@return_n_uv
+b5_3h@ros_#
+b5_3h@ros_minus_#
+b5_3h@ros_n_#
+b5_3h@ros_one
+b5_3h@rovn_#
+b5_3h@share_cnt
+b5_3h@str
+b5_3h@str_plus
+b5_6h@exp
+b5_6h@is_share
+b5_6h@return_n_uv
+b5_6h@ros_#
+b5_6h@ros_minus_#
+b5_6h@ros_n_#
+b5_6h@ros_one
+b5_6h@rovn_#
+b5_6h@share_cnt
+b5_6h@str
+b5_6h@str_plus
+b5_72h@exp
+b5_72h@is_share
+b5_72h@return_n_uv
+b5_72h@ros_#
+b5_72h@ros_minus_#
+b5_72h@ros_n_#
+b5_72h@ros_one
+b5_72h@rovn_#
+b5_72h@share_cnt
+b5_72h@str
+b5_72h@str_plus
+b6_1h@is_share
+b6_1h@return_n_uv
+b6_1h@ros_#
+b6_1h@ros_minus_#
+b6_1h@ros_n_#
+b6_1h@ros_one
+b6_1h@rovn_#
+b6_1h@str
+b6_1h@str_plus
+b6_24h@is_share
+b6_24h@return_n_uv
+b6_24h@ros_#
+b6_24h@ros_minus_#
+b6_24h@ros_n_#
+b6_24h@ros_one
+b6_24h@rovn_#
+b6_24h@str
+b6_24h@str_plus
+b7_1h@is_share
+b7_1h@return_n_uv
+b7_1h@ros_#
+b7_1h@ros_minus_#
+b7_1h@ros_n_#
+b7_1h@ros_one
+b7_1h@rovn_#
+b7_1h@str
+b7_1h@str_plus
+b7_24h@is_share
+b7_24h@return_n_uv
+b7_24h@ros_#
+b7_24h@ros_minus_#
+b7_24h@ros_n_#
+b7_24h@ros_one
+b7_24h@rovn_#
+b7_24h@str
+b7_24h@str_plus
+b7_3h@is_share
+b7_3h@return_n_uv
+b7_3h@ros_#
+b7_3h@ros_minus_#
+b7_3h@ros_n_#
+b7_3h@ros_one
+b7_3h@rovn_#
+b7_3h@str
+b7_3h@str_plus
+b7_72h@is_share
+b7_72h@return_n_uv
+b7_72h@ros_#
+b7_72h@ros_minus_#
+b7_72h@ros_n_#
+b7_72h@ros_one
+b7_72h@rovn_#
+b7_72h@str
+b7_72h@str_plus
+b8_1h@is_share
+b8_1h@return_n_uv
+b8_1h@ros_#
+b8_1h@ros_minus_#
+b8_1h@rovn_#
+b8_1h@str
+b8_1h@str_plus
+b8_24h@is_share
+b8_24h@return_n_uv
+b8_24h@ros_#
+b8_24h@ros_minus_#
+b8_24h@rovn_#
+b8_24h@str
+b8_24h@str_plus
+b8_3h@is_share
+b8_3h@return_n_uv
+b8_3h@ros_#
+b8_3h@ros_minus_#
+b8_3h@rovn_#
+b8_3h@str
+b8_3h@str_plus
+b9_1h@is_share
+b9_1h@return_n_uv
+b9_1h@ros_#
+b9_1h@ros_minus_#
+b9_1h@rovn_#
+b9_1h@str
+b9_1h@str_plus
+b9_24h@is_share
+b9_24h@return_n_uv
+b9_24h@ros_#
+b9_24h@ros_minus_#
+b9_24h@rovn_#
+b9_24h@str
+b9_24h@str_plus
+b9_3h@is_share
+b9_3h@return_n_uv
+b9_3h@ros_#
+b9_3h@ros_minus_#
+b9_3h@rovn_#
+b9_3h@str
+b9_3h@str_plus
+c1_168h@is_share
+c1_168h@return_n_uv
+c1_168h@ros_#
+c1_168h@ros_minus_#
+c1_168h@ros_n_#
+c1_168h@ros_one
+c1_168h@rovn_#
+c1_168h@str
+c1_168h@str_plus
+c1_72h@is_share
+c1_72h@return_n_uv
+c1_72h@ros_#
+c1_72h@ros_minus_#
+c1_72h@ros_n_#
+c1_72h@ros_one
+c1_72h@rovn_#
+c1_72h@str
+c1_72h@str_plus
+c5_tags_1d@avgscore
+c5_tags_1d@maxscore
+c5_tags_3d@avgscore
+c5_tags_3d@maxscore
+c5_tags_7d@avgscore
+c5_tags_7d@maxscore
+c6_tags_1d@avgscore
+c6_tags_1d@maxscore
+c6_tags_3d@avgscore
+c6_tags_3d@maxscore
+c6_tags_7d@avgscore
+c6_tags_7d@maxscore
+c9@m_r_uv
+c9@m_s_cnt
+c9@r_pv
+c9@r_uv
+c9@ros
+c9@ros_minus
+c9@ros_one
+c9@s_cnt
+c9@s_pv
+c9_c1s@mu
+c9_c1s@ros
+c9_c1s@ros_minus
+c9_c1s@ros_one
+c9_c1s@rp
+c9_c1s@ru
+c9_c1s@sp
+c9_c2s@mu
+c9_c2s@ros
+c9_c2s@ros_minus
+c9_c2s@ros_one
+c9_c2s@rp
+c9_c2s@ru
+c9_c2s@sp
+c9_l1s@mu
+c9_l1s@ros
+c9_l1s@ros_minus
+c9_l1s@sp
+c9_l2s@mu
+c9_l2s@ros
+c9_l2s@ros_minus
+c9_l2s@sp
+c9_lrs@1@title
+c9_lrs@1@ts
+c9_lrs@1@uv
+c9_lrs@2@title
+c9_lrs@2@ts
+c9_lrs@2@uv
+c9_lss@1@cnt
+c9_lss@1@title
+c9_lss@1@ts
+c9_lss@2@cnt
+c9_lss@2@title
+c9_lss@2@ts
+c9_mrs@1@title
+c9_mrs@1@ts
+c9_mrs@1@uv
+c9_mrs@2@title
+c9_mrs@2@ts
+c9_mrs@2@uv
+c9_mss@1@cnt
+c9_mss@1@title
+c9_mss@1@ts
+c9_mss@2@cnt
+c9_mss@2@title
+c9_mss@2@ts
+d1@ros_cf_rank
+d1@ros_cf_score
+d1@rov_cf_rank
+d1@rov_cf_score
+d2@rank
+d2@score
+d3@exp
+d3@return_n
+d3@rovn
+h@bit_rate
+h@total_time
+h@ts
+hour
+hr_sim@cate2
+hr_sim@cate2_list
+hr_sim@keywords
+hr_sim@title
+r@bit_rate
+r@total_time
+r@ts

+ 48 - 0
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/MetricUtils.scala

@@ -0,0 +1,48 @@
+package com.tzld.piaoquan.recommend.model
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Row
+
+object MetricUtils {
+  def logScale(label: Double, logType: Int, logBase: Double): Double = {
+    if (0 == logType) {
+      label
+    } else {
+      Math.log(1 + label) / Math.log(logBase)
+    }
+  }
+
+  def restoreLog(predict: Double, logType: Int, logBase: Double): Double = {
+    if (0 == logType) {
+      predict
+    } else {
+      Math.exp(predict * Math.log(logBase)) - 1
+    }
+  }
+
+  def clipLabel(label: Double, maxVal: Double = 30): Double = {
+    if (label < maxVal) {
+      label
+    } else {
+      maxVal + 2 * Math.log(label - maxVal + 1)
+    }
+  }
+
+  def calMAPE(evalRdd: RDD[Row]): Double = {
+    val apeRdd = evalRdd.map(raw => {
+      val label = raw.get(0).toString.toDouble
+      val pred = raw.get(1).toString.toDouble
+      math.abs(label - pred) / label
+    })
+    apeRdd.sum() / apeRdd.count()
+  }
+
+  def calRMSLE(evalRdd: RDD[Row]): Double = {
+    val sleRdd = evalRdd.map(raw => {
+      val label = raw.get(0).toString.toDouble
+      val pred = raw.get(1).toString.toDouble
+      math.pow(math.log(pred + 1) - math.log(label + 1), 2)
+    })
+    math.sqrt(sleRdd.sum() / sleRdd.count())
+  }
+}

+ 128 - 0
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/pred_profile_gender_xgb_20251114.scala

@@ -0,0 +1,128 @@
+package com.tzld.piaoquan.recommend.model
+
+import ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel
+import org.apache.commons.lang.math.NumberUtils
+import org.apache.commons.lang3.StringUtils
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
+import org.apache.spark.ml.feature.VectorAssembler
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.types.DataTypes
+import org.apache.spark.sql.{Row, SparkSession}
+
+import java.util
+import scala.io.Source
+
+
+object pred_profile_gender_xgb_20251114 {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    val param = ParamUtils.parseArgs(args)
+    val modelPath = param.getOrElse("modelPath", "/dw/recommend/model/user_profile/gender/model/model_xgb")
+    val testPath = param.getOrElse("testPath", "")
+    val featureFile = param.getOrElse("featureFile", "20241209_recsys_rov_name.txt")
+    val minCnt = param.getOrElse("minCnt", "10").toDouble
+    val minFeatCnt = param.getOrElse("minFeatCnt", "1").toDouble
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/user_profile/gender/result")
+    val repartition = param.getOrElse("repartition", "20").toInt
+
+    val features = loadFeatureNames(featureFile)
+    var fields = Array(
+      DataTypes.createStructField("label", DataTypes.IntegerType, true)
+    ) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
+    fields = fields ++ Array(
+      DataTypes.createStructField("mid", DataTypes.StringType, true),
+      DataTypes.createStructField("cnt", DataTypes.IntegerType, true)
+    )
+    val schema = DataTypes.createStructType(fields)
+    val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
+
+    val model = XGBoostClassificationModel.load(modelPath)
+    model.setMissing(0.0f).setFeaturesCol("features")
+    val testData = createData(
+      minCnt,
+      minFeatCnt,
+      sc.textFile(testPath),
+      features
+    )
+
+    val testDataSet = spark.createDataFrame(testData, schema)
+    val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label", "mid", "cnt")
+    val predictions = model.transform(testDataSetTrans)
+    val saveData = predictions.select("label", "rawPrediction", "probability", "mid", "cnt").rdd
+      .map(r => {
+        (r.get(0), r.get(1), r.get(2), r.get(3), r.get(4)).productIterator.mkString("\t")
+      })
+    val hdfsPath = savePath
+    if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+      println("删除路径并开始数据写入:" + hdfsPath)
+      MyHdfsUtils.delete_hdfs_path(hdfsPath)
+      saveData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+    } else {
+      println("路径不合法,无法写入:" + hdfsPath)
+    }
+
+    val evaluator = new BinaryClassificationEvaluator()
+      .setLabelCol("label")
+      .setRawPredictionCol("probability")
+      .setMetricName("areaUnderROC")
+    val auc = evaluator.evaluate(predictions.select("label", "probability"))
+    println("recsys rov:auc:" + auc)
+
+    println("---------------------------------\n")
+    println("---------------------------------\n")
+  }
+
+  def createData(minCnt: Double, minFeatCnt: Double, data: RDD[String], features: Array[String]): RDD[Row] = {
+    val featureSet = features.toSet
+    data
+      .map(row => {
+        val cells: Array[String] = StringUtils.split(row, '\t')
+        val mid = cells(0)
+        val label = NumberUtils.toInt(cells(1))
+        val featureMap: util.Map[String, Double] = new util.HashMap[String, Double]
+        var featCnt = 0
+        for (i <- 2 until cells.length) {
+          val fv: Array[String] = StringUtils.split(cells(i), ':')
+          featureMap.put(fv(0), NumberUtils.toDouble(fv(1), 0.0))
+          if (featureSet.contains(fv(0))) {
+            featCnt += 1
+          }
+        }
+        (mid, label, featureMap, featCnt)
+      })
+      .filter {
+        case (mid, label, featureMap, featCnt) =>
+          val cnt = featureMap.getOrDefault("cnt", 0.0d)
+          cnt >= minCnt && featCnt >= minFeatCnt
+      }
+      .map {
+        case (mid, label, featureMap, featCnt) =>
+          val v: Array[Any] = new Array[Any](features.length + 3)
+          v(0) = label
+          for (i <- features.indices) {
+            v(i + 1) = featureMap.getOrDefault(features(i), 0.0d)
+          }
+          v(features.length + 1) = mid
+          v(features.length + 2) = featureMap.getOrDefault("cnt", 0.0d).toInt
+          Row(v: _*)
+      }
+  }
+
+  def loadFeatureNames(nameFile: String): Array[String] = {
+    val buffer = Source.fromFile(nameFile)
+    val names = buffer.getLines().mkString("\n")
+    buffer.close()
+    val featArray = names.split("\n")
+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
+      .filter(r => r.nonEmpty)
+    println("featArray.size=" + featArray.length)
+    println(featArray.mkString(","))
+    featArray
+  }
+}

+ 150 - 0
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/pred_recsys_61_xgb_nor_hdfsfile_20241209.scala

@@ -0,0 +1,150 @@
+package com.tzld.piaoquan.recommend.model
+
+import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel
+import org.apache.commons.lang.math.NumberUtils
+import org.apache.commons.lang3.StringUtils
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.ml.evaluation.RegressionEvaluator
+import org.apache.spark.ml.feature.VectorAssembler
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.types.DataTypes
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+
+import java.util
+import scala.io.Source
+
+object pred_recsys_61_xgb_nor_hdfsfile_20241209 {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    val param = ParamUtils.parseArgs(args)
+    val featureFile = param.getOrElse("featureFile", "20241209_recsys_nor_name.txt")
+    val testPath = param.getOrElse("testPath", "")
+    val labelLogType = param.getOrElse("labelLogType", "0").toInt
+    val labelLogBase = param.getOrElse("labelLogBase", "2").toDouble
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_recsys_nor_predict_data/")
+    val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
+
+    val repartition = param.getOrElse("repartition", "20").toInt
+    val modelPath = param.getOrElse("modelPath", "/dw/recommend/model/61_recsys_nor_model/model_xgb")
+
+    val loader = getClass.getClassLoader
+    val resourceUrl = loader.getResource(featureFile)
+    val content =
+      if (resourceUrl != null) {
+        val content = Source.fromURL(resourceUrl).getLines().mkString("\n")
+        Source.fromURL(resourceUrl).close()
+        content
+      } else {
+        ""
+      }
+    println(content)
+
+    val features = content.split("\n")
+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
+      .filter(r => r.nonEmpty || !featureFilter.contains(r))
+    println("features.size=" + features.length)
+
+    var fields = Array(
+      DataTypes.createStructField("label", DataTypes.DoubleType, true)
+    ) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
+    fields = fields ++ Array(
+      DataTypes.createStructField("logKey", DataTypes.StringType, true),
+      DataTypes.createStructField("scoresMap", DataTypes.StringType, true)
+    )
+
+    val schema = DataTypes.createStructType(fields)
+    val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
+
+    val model = XGBoostRegressionModel.load(modelPath)
+    model.setMissing(0.0f).setFeaturesCol("features")
+
+    val testData = createData(
+      sc.textFile(testPath),
+      features
+    )
+    println("recsys nor:test data size:" + testData.count())
+
+    val testDataSet = spark.createDataFrame(testData, schema)
+    val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label", "logKey", "scoresMap")
+    val predictions = model.transform(testDataSetTrans)
+    val clipPrediction = getClipData(spark, predictions, labelLogType, labelLogBase).persist()
+
+    val saveData = clipPrediction.select("label", "prediction", "clipPrediction", "logKey", "scoresMap").rdd
+      .map(r => {
+        (r.get(0), r.get(1), r.get(2), r.get(3), r.get(4)).productIterator.mkString("\t")
+      })
+    val hdfsPath = savePath
+    if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+      println("删除路径并开始数据写入:" + hdfsPath)
+      MyHdfsUtils.delete_hdfs_path(hdfsPath)
+      saveData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+    } else {
+      println("路径不合法,无法写入:" + hdfsPath)
+    }
+
+    val rmseEvaluator = new RegressionEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("clipPrediction")
+      .setMetricName("rmse")
+    val maeEvaluator = new RegressionEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("clipPrediction")
+      .setMetricName("mae")
+    val rmse = rmseEvaluator.evaluate(clipPrediction.select("label", "clipPrediction"))
+    val mae = maeEvaluator.evaluate(clipPrediction.select("label", "clipPrediction"))
+    val mape = MetricUtils.calMAPE(clipPrediction.select("label", "clipPrediction").rdd)
+    val rmsle = MetricUtils.calRMSLE(clipPrediction.select("label", "clipPrediction").rdd)
+    printf("recsys nor:rmse: %.6f\n", rmse)
+    printf("recsys nor:mae: %.6f\n", mae)
+    printf("recsys nor:mape: %.6f\n", mape)
+    printf("recsys nor:rmsle: %.6f\n", rmsle)
+
+    println("---------------------------------\n")
+    println("---------------------------------\n")
+  }
+
+  def createData(data: RDD[String], features: Array[String]): RDD[Row] = {
+    data.map(r => {
+      val line: Array[String] = StringUtils.split(r, '\t')
+      val logKey = line(0)
+      val label: Double = NumberUtils.toDouble(line(1))
+      val scoresMap = line(2)
+      val map: util.Map[String, Double] = new util.HashMap[String, Double]
+      for (i <- 3 until line.length) {
+        val fv: Array[String] = StringUtils.split(line(i), ':')
+        map.put(fv(0), NumberUtils.toDouble(fv(1), 0.0))
+      }
+
+      val v: Array[Any] = new Array[Any](features.length + 3)
+      v(0) = label
+      for (i <- 0 until features.length) {
+        v(i + 1) = map.getOrDefault(features(i), 0.0d)
+      }
+      v(features.length + 1) = logKey
+      v(features.length + 2) = scoresMap
+      Row(v: _*)
+    })
+  }
+
+  def getClipData(spark: SparkSession, df: DataFrame, logType: Int, logBase: Double): DataFrame = {
+    import spark.implicits._
+    df.select("label", "prediction", "logKey", "scoresMap").rdd
+      .map(row => {
+        val label = row.getAs[Double]("label")
+        val prediction = MetricUtils.restoreLog(row.getAs[Double]("prediction"), logType, logBase)
+        val logKey = row.getAs[String]("logKey")
+        val scoresMap = row.getAs[String]("scoresMap")
+        if (prediction < 1E-8) {
+          (label, prediction, 0d, logKey, scoresMap)
+        } else {
+          (label, prediction, prediction, logKey, scoresMap)
+        }
+      }
+      ).toDF("label", "prediction", "clipPrediction", "logKey", "scoresMap")
+  }
+}

+ 127 - 0
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/pred_recsys_61_xgb_rov_hdfsfile_20241209.scala

@@ -0,0 +1,127 @@
+package com.tzld.piaoquan.recommend.model
+
+import ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel
+import org.apache.commons.lang.math.NumberUtils
+import org.apache.commons.lang3.StringUtils
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
+import org.apache.spark.ml.feature.VectorAssembler
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.types.DataTypes
+import org.apache.spark.sql.{Row, SparkSession}
+
+import java.util
+import scala.io.Source
+import scala.util.Random
+
+object pred_recsys_61_xgb_rov_hdfsfile_20241209 {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    val param = ParamUtils.parseArgs(args)
+    val featureFile = param.getOrElse("featureFile", "20241209_recsys_rov_name.txt")
+    val testPath = param.getOrElse("testPath", "")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_recsys_rov_predict_data/")
+    val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
+
+    val negRate = param.getOrElse("negRate", "1.0").toDouble
+    val repartition = param.getOrElse("repartition", "20").toInt
+    val modelPath = param.getOrElse("modelPath", "/dw/recommend/model/61_recsys_rov_model/model_xgb")
+
+    val loader = getClass.getClassLoader
+    val resourceUrl = loader.getResource(featureFile)
+    val content =
+      if (resourceUrl != null) {
+        val content = Source.fromURL(resourceUrl).getLines().mkString("\n")
+        Source.fromURL(resourceUrl).close()
+        content
+      } else {
+        ""
+      }
+    println(content)
+
+    val features = content.split("\n")
+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
+      .filter(r => r.nonEmpty || !featureFilter.contains(r))
+    println("features.size=" + features.length)
+
+    var fields = Array(
+      DataTypes.createStructField("label", DataTypes.IntegerType, true)
+    ) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
+    fields = fields ++ Array(
+      DataTypes.createStructField("logKey", DataTypes.StringType, true),
+      DataTypes.createStructField("scoresMap", DataTypes.StringType, true)
+    )
+
+    val schema = DataTypes.createStructType(fields)
+    val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
+
+    val model = XGBoostClassificationModel.load(modelPath)
+    model.setMissing(0.0f).setFeaturesCol("features")
+
+    val testData = createData(
+      negRate,
+      sc.textFile(testPath),
+      features
+    )
+
+    val testDataSet = spark.createDataFrame(testData, schema)
+    val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label", "logKey", "scoresMap")
+    val predictions = model.transform(testDataSetTrans)
+
+    val saveData = predictions.select("label", "rawPrediction", "probability", "logKey", "scoresMap").rdd
+      .map(r => {
+        (r.get(0), r.get(1), r.get(2), r.get(3), r.get(4)).productIterator.mkString("\t")
+      })
+    val hdfsPath = savePath
+    if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+      println("删除路径并开始数据写入:" + hdfsPath)
+      MyHdfsUtils.delete_hdfs_path(hdfsPath)
+      saveData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+    } else {
+      println("路径不合法,无法写入:" + hdfsPath)
+    }
+
+    val evaluator = new BinaryClassificationEvaluator()
+      .setLabelCol("label")
+      .setRawPredictionCol("probability")
+      .setMetricName("areaUnderROC")
+    val auc = evaluator.evaluate(predictions.select("label", "probability"))
+    println("recsys rov:auc:" + auc)
+
+    println("---------------------------------\n")
+    println("---------------------------------\n")
+  }
+
+  def createData(negRate: Double, data: RDD[String], features: Array[String]): RDD[Row] = {
+    data.filter(r => {
+        val line: Array[String] = StringUtils.split(r, '\t')
+        val label: Int = NumberUtils.toInt(line(1))
+        label > 0 || new Random().nextDouble() <= negRate
+      })
+      .map(r => {
+        val line: Array[String] = StringUtils.split(r, '\t')
+        val logKey = line(0)
+        val label: Int = NumberUtils.toInt(line(1))
+        val scoresMap = line(2)
+        val map: util.Map[String, Double] = new util.HashMap[String, Double]
+        for (i <- 3 until line.length) {
+          val fv: Array[String] = StringUtils.split(line(i), ':')
+          map.put(fv(0), NumberUtils.toDouble(fv(1), 0.0))
+        }
+
+        val v: Array[Any] = new Array[Any](features.length + 3)
+        v(0) = label
+        for (i <- 0 until features.length) {
+          v(i + 1) = map.getOrDefault(features(i), 0.0d)
+        }
+        v(features.length + 1) = logKey
+        v(features.length + 2) = scoresMap
+        Row(v: _*)
+      })
+  }
+}

+ 94 - 0
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/rov_offline_ab_auc.scala

@@ -0,0 +1,94 @@
+package com.tzld.piaoquan.recommend.model
+
+import com.alibaba.fastjson.JSON
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, SparkSession}
+
+object rov_offline_ab_auc {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    val param = ParamUtils.parseArgs(args)
+    val testPath = param.getOrElse("testPath", "")
+    val whatApps = param.getOrElse("whatApps", "0,3,4,21,17").split(",").toSet
+    val baseAbCodes = param.getOrElse("baseAbCodes", "ab3,ab4,ab8,ab9").split(",").toSet
+    val expAbCodes = param.getOrElse("expAbCodes", "ab0,ab1,ab2,ab5,ab6,ab7").split(",").toSet
+    val baseScore = param.getOrElse("baseScore", "fmRov")
+    val expScore = param.getOrElse("expScore", "fmRov")
+
+    val testData = loadData(whatApps, sc.textFile(testPath))
+    val baseData = getSubData(spark, baseAbCodes, baseScore, testData)
+    val expData = getSubData(spark, expAbCodes, expScore, testData)
+
+    val evaluator = new BinaryClassificationEvaluator()
+      .setLabelCol("label")
+      .setRawPredictionCol("score")
+      .setMetricName("areaUnderROC")
+    val baseCnt = baseData.count()
+    val expCnt = expData.count()
+    val baseAuc = evaluator.evaluate(baseData.select("label", "score"))
+    val expAuc = evaluator.evaluate(expData.select("label", "score"))
+    printf("base count: %d, auc: %.6f\n", baseCnt, baseAuc)
+    printf("exp count: %d, auc: %.6f\n", expCnt, expAuc)
+    println("---------------------------------\n")
+    println("---------------------------------\n")
+  }
+
+
+  def loadData(whatApps: Set[String], data: RDD[String]): RDD[(String, Double, String)] = {
+    data
+      .map(r => {
+        // logKey + "\t" + label + "\t" + scoresMap + "\t" + featuresBucket.mkString("\t")
+        val rList = r.split("\t")
+        val logKey = rList(0)
+        val label = rList(1).toDouble
+        val scoresMap = rList(2)
+        (logKey, label, scoresMap)
+      })
+      .filter(raw => {
+        validApp(raw._1, whatApps)
+      })
+  }
+
+  private def validApp(logKey: String, whatApps: Set[String]): Boolean = {
+    // apptype, page, pagesource, recommendpagetype, flowpool, abcode, mid, vid, level, ts
+    val cells = logKey.split(",")
+    val apptype = cells(0)
+    val page = cells(1)
+    //val pagesource = cells(2)
+    val recommendpagetype = cells(3)
+    val flowpool = cells(4)
+    val abcode = cells(5)
+    if (whatApps.contains(apptype)) {
+      return true
+    }
+    false
+  }
+
+  private def parseScore(data: String, key: String, default: String = "-2"): Double = {
+    JSON.parseObject(data).getOrDefault(key, default).toString.toDouble
+  }
+
+  private def getSubData(spark: SparkSession, abCodes: Set[String], whatScore: String, data: RDD[(String, Double, String)]): DataFrame = {
+    import spark.implicits._
+    data
+      .filter(raw => {
+        var flag = false
+        val cells = raw._1.split(",")
+        if (abCodes.contains(cells(5))) {
+          val score = parseScore(raw._3, whatScore)
+          flag = score > -1
+        }
+        flag
+      })
+      .map(raw => {
+        (raw._1, raw._2, parseScore(raw._3, whatScore))
+      })
+      .toDF("logKey", "label", "score")
+  }
+}

+ 71 - 0
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/stat_qq.scala

@@ -0,0 +1,71 @@
+package com.tzld.piaoquan.recommend.model
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.expressions.Window
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.{DataFrame, SparkSession}
+
+
+object stat_qq {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // param
+    val param = ParamUtils.parseArgs(args)
+    val predictPath = param.getOrElse("predictPath", "/dw/recommend/model/general_model/ad_post_conver/eval")
+    val bucketNum = param.getOrElse("bucketNum", "60").toInt
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/general_model/ad_post_conver/qq")
+
+    // data
+    val predictDF = loadData(spark, sc.textFile(predictPath))
+
+    // process
+    import spark.implicits._
+    val bucketDF = predictDF.withColumn("bucketId", ntile(bucketNum).over(Window.orderBy("predict")))
+      .select($"predict", $"label", $"bucketId")
+      .groupBy("bucketId")
+      .agg(min("predict").alias("min"), max("predict").as("max"), round(avg("predict"), 6).as("predict"), round(avg("label"), 6).as("real"), count("label").as("cnt"))
+      .orderBy("bucketId")
+
+    // save
+    val hdfsPath = savePath
+    if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+      println("删除路径并开始数据写入:" + hdfsPath)
+      MyHdfsUtils.delete_hdfs_path(hdfsPath)
+      bucketDF.write.format("csv").option("header", "true").save(hdfsPath)
+    } else {
+      println("路径不合法,无法写入:" + hdfsPath)
+    }
+  }
+
+  private def parseScore(data: String): String = {
+    if (data.nonEmpty) {
+      val pair = data.replace("[", "").replace("]", "").split(",")
+      if (pair.length > 1) {
+        return pair(1).toDouble.formatted("%.6f")
+      }
+    }
+    "-1"
+  }
+
+  def loadData(spark: SparkSession, data: RDD[String]): DataFrame = {
+    import spark.implicits._
+    data
+      .map(r => {
+        val cells = r.split("\t")
+        val label = cells(0)
+        val logit = cells(1)
+        val score = parseScore(cells(2)).toDouble
+        val mid = cells(3)
+        (score, label, mid)
+      })
+      .filter(raw => {
+        raw._1 > -0.1
+      })
+      .toDF("predict", "label", "logKey")
+  }
+}

+ 121 - 0
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_profile_gender_xgb_20251114.scala

@@ -0,0 +1,121 @@
+package com.tzld.piaoquan.recommend.model
+
+import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
+import org.apache.commons.lang.math.NumberUtils
+import org.apache.commons.lang3.StringUtils
+import org.apache.spark.ml.feature.VectorAssembler
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.types.DataTypes
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+
+import java.util
+import scala.io.Source
+
+object train_profile_gender_xgb_20251114 {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    val param = ParamUtils.parseArgs(args)
+    val trainPath = param.getOrElse("trainPath", "/dw/recommend/model/user_profile/gender/sample/train/2025_y")
+    val featureFile = param.getOrElse("featureFile", "20241209_recsys_nor_name.txt")
+    val minCnt = param.getOrElse("minCnt", "10").toDouble
+    val minFeatCnt = param.getOrElse("minFeatCnt", "1").toDouble
+    val eta = param.getOrElse("eta", "0.01").toDouble
+    val gamma = param.getOrElse("gamma", "0.0").toDouble
+    val max_depth = param.getOrElse("max_depth", "5").toInt
+    val num_round = param.getOrElse("num_round", "100").toInt
+    val num_worker = param.getOrElse("num_worker", "20").toInt
+    val func_object = param.getOrElse("func_object", "binary:logistic")
+    val func_metric = param.getOrElse("func_metric", "auc")
+    val modelPath = param.getOrElse("modelPath", "/dw/recommend/model/user_profile/gender/model/model_xgb")
+
+    val features = loadFeatureNames(featureFile)
+    val trainData = createData(
+      minCnt,
+      minFeatCnt,
+      sc.textFile(trainPath),
+      features
+    )
+    println("profile gender:train data size:" + trainData.count())
+
+    val fields = Array(
+      DataTypes.createStructField("label", DataTypes.IntegerType, true)
+    ) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
+
+    val schema = DataTypes.createStructType(fields)
+    val trainDataSet: Dataset[Row] = spark.createDataFrame(trainData, schema)
+    val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
+    val xgbInput = vectorAssembler.transform(trainDataSet).select("features", "label").persist()
+    val xgbClassifier = new XGBoostClassifier()
+      .setEta(eta)
+      .setGamma(gamma)
+      .setMissing(0.0f)
+      .setMaxDepth(max_depth)
+      .setNumRound(num_round)
+      .setSubsample(0.8)
+      .setColsampleBytree(0.8)
+      .setScalePosWeight(1)
+      .setObjective(func_object)
+      .setEvalMetric(func_metric)
+      .setFeaturesCol("features")
+      .setLabelCol("label")
+      .setNthread(1)
+      .setNumWorkers(num_worker)
+      .setSeed(2024)
+      .setMinChildWeight(1)
+    val model = xgbClassifier.fit(xgbInput)
+    if (modelPath.nonEmpty) {
+      model.write.overwrite.save(modelPath)
+    }
+  }
+
+  def createData(minCnt: Double, minFeatCnt: Double, data: RDD[String], features: Array[String]): RDD[Row] = {
+    val featureSet = features.toSet
+    data
+      .map(row => {
+        val cells: Array[String] = StringUtils.split(row, '\t')
+        val mid = cells(0)
+        val label = NumberUtils.toInt(cells(1))
+        val featureMap: util.Map[String, Double] = new util.HashMap[String, Double]
+        var featCnt = 0
+        for (i <- 2 until cells.length) {
+          val fv: Array[String] = StringUtils.split(cells(i), ':')
+          featureMap.put(fv(0), NumberUtils.toDouble(fv(1), 0.0))
+          if (featureSet.contains(fv(0))) {
+            featCnt += 1
+          }
+        }
+        (mid, label, featureMap, featCnt)
+      })
+      .filter {
+        case (mid, label, featureMap, featCnt) =>
+          val cnt = featureMap.getOrDefault("cnt", 0.0d)
+          cnt >= minCnt && featCnt >= minFeatCnt
+      }
+      .map {
+        case (mid, label, featureMap, featCnt) =>
+          val v: Array[Any] = new Array[Any](features.length + 1)
+          v(0) = label
+          for (i <- features.indices) {
+            v(i + 1) = featureMap.getOrDefault(features(i), 0.0d)
+          }
+          Row(v: _*)
+      }
+  }
+
+  def loadFeatureNames(nameFile: String): Array[String] = {
+    val buffer = Source.fromFile(nameFile)
+    val names = buffer.getLines().mkString("\n")
+    buffer.close()
+    val featArray = names.split("\n")
+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
+      .filter(r => r.nonEmpty)
+    println("featArray.size=" + featArray.length)
+    println(featArray.mkString(","))
+    featArray
+  }
+}

+ 161 - 0
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_recsys_61_xgb_nor_20241209.scala

@@ -0,0 +1,161 @@
+package com.tzld.piaoquan.recommend.model
+
+import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor
+import org.apache.commons.lang.math.NumberUtils
+import org.apache.commons.lang3.StringUtils
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.ml.evaluation.RegressionEvaluator
+import org.apache.spark.ml.feature.VectorAssembler
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.types.DataTypes
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+
+import java.util
+import scala.io.Source
+
+object train_recsys_61_xgb_nor_20241209 {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    val param = ParamUtils.parseArgs(args)
+    val featureFile = param.getOrElse("featureFile", "20241209_recsys_nor_name.txt")
+    val trainPath = param.getOrElse("trainPath", "/dw/recommend/model/61_recsys_nor_train_data/20241210")
+    val testPath = param.getOrElse("testPath", "")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_recsys_nor_predict_data/")
+    val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
+    val labelLogType = param.getOrElse("labelLogType", "0").toInt
+    val labelLogBase = param.getOrElse("labelLogBase", "2").toDouble
+    val eta = param.getOrElse("eta", "0.01").toDouble
+    val gamma = param.getOrElse("gamma", "0.0").toDouble
+    val max_depth = param.getOrElse("max_depth", "5").toInt
+    val num_round = param.getOrElse("num_round", "100").toInt
+    val num_worker = param.getOrElse("num_worker", "20").toInt
+    val func_object = param.getOrElse("func_object", "reg:squaredlogerror")
+    val func_metric = param.getOrElse("func_metric", "rmsle")
+    val repartition = param.getOrElse("repartition", "20").toInt
+    val modelPath = param.getOrElse("modelPath", "/dw/recommend/model/61_recsys_nor_model/model_xgb")
+    val modelFile = param.getOrElse("modelFile", "model_xgb_for_recsys_nor.tar.gz")
+
+    val loader = getClass.getClassLoader
+    val resourceUrl = loader.getResource(featureFile)
+    val content =
+      if (resourceUrl != null) {
+        val content = Source.fromURL(resourceUrl).getLines().mkString("\n")
+        Source.fromURL(resourceUrl).close()
+        content
+      } else {
+        ""
+      }
+    println(content)
+
+    val features = content.split("\n")
+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
+      .filter(r => r.nonEmpty || !featureFilter.contains(r))
+    println("features.size=" + features.length)
+
+    val trainData = createData(
+      labelLogType,
+      labelLogBase,
+      sc.textFile(trainPath),
+      features
+    )
+    println("recsys nor:train data size:" + trainData.count())
+
+    val fields = Array(
+      DataTypes.createStructField("label", DataTypes.DoubleType, true)
+    ) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
+
+    val schema = DataTypes.createStructType(fields)
+    val trainDataSet: Dataset[Row] = spark.createDataFrame(trainData, schema)
+    val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
+    val xgbInput = vectorAssembler.transform(trainDataSet).select("features", "label").persist()
+    //    val xgbParam = Map("eta" -> 0.01f,
+    //      "max_depth" -> 5,
+    //      "objective" -> "reg:squaredlogerror")
+    val xgbRegressor = new XGBoostRegressor()
+      .setEta(eta)
+      .setGamma(gamma)
+      .setMissing(0.0f)
+      .setMaxDepth(max_depth)
+      .setNumRound(num_round)
+      .setSubsample(0.8)
+      .setColsampleBytree(0.8)
+      .setObjective(func_object)
+      .setEvalMetric(func_metric)
+      .setFeaturesCol("features")
+      .setLabelCol("label")
+      .setNthread(1)
+      .setNumWorkers(num_worker)
+      .setSeed(2024)
+      .setMinChildWeight(1)
+    val model = xgbRegressor.fit(xgbInput)
+
+    if (modelPath.nonEmpty && modelFile.nonEmpty) {
+      model.write.overwrite.save(modelPath)
+      // val gzPath = modelPath + "/" + modelFile
+      // CompressUtil.compressDirectoryToGzip(modelPath, gzPath)
+    }
+
+    if (testPath.nonEmpty) {
+      val testData = createData(
+        labelLogType,
+        labelLogBase,
+        sc.textFile(testPath),
+        features
+      )
+      val testDataSet = spark.createDataFrame(testData, schema)
+      val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label")
+      val predictions = model.transform(testDataSetTrans)
+
+      println("recsys nor:columns:" + predictions.columns.mkString(",")) //[label, features, prediction]
+      val saveData = predictions.select("label", "prediction").rdd
+        .map(r => {
+          (r.get(0), r.get(1)).productIterator.mkString("\t")
+        })
+      val hdfsPath = savePath
+      if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+        println("删除路径并开始数据写入:" + hdfsPath)
+        MyHdfsUtils.delete_hdfs_path(hdfsPath)
+        saveData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+      } else {
+        println("路径不合法,无法写入:" + hdfsPath)
+      }
+      val evaluator = new RegressionEvaluator()
+        .setLabelCol("label")
+        .setPredictionCol("prediction")
+        .setMetricName("rmse")
+      val rmse = evaluator.evaluate(predictions.select("label", "prediction"))
+      println("recsys nor: rmse:" + rmse)
+    }
+  }
+
+  def createData(logType: Int, logBase: Double, data: RDD[String], features: Array[String]): RDD[Row] = {
+    data
+      .filter(r => {
+        val line: Array[String] = StringUtils.split(r, '\t')
+        line.length > 10
+      })
+      .map(r => {
+        val line: Array[String] = StringUtils.split(r, '\t')
+        // val logKey = line(0)
+        val label: Double = NumberUtils.toDouble(line(1))
+        // val scoresMap = line(2)
+        val map: util.Map[String, Double] = new util.HashMap[String, Double]
+        for (i <- 3 until line.length) {
+          val fv: Array[String] = StringUtils.split(line(i), ':')
+          map.put(fv(0), NumberUtils.toDouble(fv(1), 0.0))
+        }
+
+        val v: Array[Any] = new Array[Any](features.length + 1)
+        v(0) = MetricUtils.logScale(label, logType, logBase)
+        for (i <- 0 until features.length) {
+          v(i + 1) = map.getOrDefault(features(i), 0.0d)
+        }
+        Row(v: _*)
+      })
+  }
+}

+ 162 - 0
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_recsys_61_xgb_rov_20241209.scala

@@ -0,0 +1,162 @@
+package com.tzld.piaoquan.recommend.model
+
+import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
+import org.apache.commons.lang.math.NumberUtils
+import org.apache.commons.lang3.StringUtils
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
+import org.apache.spark.ml.feature.VectorAssembler
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.types.DataTypes
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+
+import java.util
+import scala.io.Source
+import scala.util.Random
+
+object train_recsys_61_xgb_rov_20241209 {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    val param = ParamUtils.parseArgs(args)
+    val featureFile = param.getOrElse("featureFile", "20241209_recsys_rov_name.txt")
+    val trainPath = param.getOrElse("trainPath", "/dw/recommend/model/61_recsys_rov_train_data/20241210")
+    val testPath = param.getOrElse("testPath", "")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_recsys_rov_predict_data/")
+    val featureFilter = param.getOrElse("featureFilter", "XXXXXX").split(",")
+    val eta = param.getOrElse("eta", "0.01").toDouble
+    val gamma = param.getOrElse("gamma", "0.0").toDouble
+    val max_depth = param.getOrElse("max_depth", "5").toInt
+    val num_round = param.getOrElse("num_round", "100").toInt
+    val num_worker = param.getOrElse("num_worker", "20").toInt
+    val func_object = param.getOrElse("func_object", "binary:logistic")
+    val func_metric = param.getOrElse("func_metric", "auc")
+    val repartition = param.getOrElse("repartition", "20").toInt
+    val negRate = param.getOrElse("negRate", "1.0").toDouble
+    val modelPath = param.getOrElse("modelPath", "/dw/recommend/model/61_recsys_rov_model/model_xgb")
+    val modelFile = param.getOrElse("modelFile", "model_xgb_for_recsys_rov.tar.gz")
+
+    val loader = getClass.getClassLoader
+    val resourceUrl = loader.getResource(featureFile)
+    val content =
+      if (resourceUrl != null) {
+        val content = Source.fromURL(resourceUrl).getLines().mkString("\n")
+        Source.fromURL(resourceUrl).close()
+        content
+      } else {
+        ""
+      }
+    println(content)
+
+    val features = content.split("\n")
+      .map(r => r.replace(" ", "").replaceAll("\n", ""))
+      .filter(r => r.nonEmpty || !featureFilter.contains(r))
+    println("features.size=" + features.length)
+
+    val trainData = createData(
+      negRate,
+      sc.textFile(trainPath),
+      features
+    )
+    println("recsys rov:train data size:" + trainData.count())
+
+    val fields = Array(
+      DataTypes.createStructField("label", DataTypes.IntegerType, true)
+    ) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
+
+    val schema = DataTypes.createStructType(fields)
+    val trainDataSet: Dataset[Row] = spark.createDataFrame(trainData, schema)
+    val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
+    val xgbInput = vectorAssembler.transform(trainDataSet).select("features", "label")
+    //    val xgbParam = Map("eta" -> 0.01f,
+    //      "max_depth" -> 5,
+    //      "objective" -> "binary:logistic",
+    //      "num_class" -> 3)
+    val xgbClassifier = new XGBoostClassifier()
+      .setEta(eta)
+      .setGamma(gamma)
+      .setMissing(0.0f)
+      .setMaxDepth(max_depth)
+      .setNumRound(num_round)
+      .setSubsample(0.8)
+      .setColsampleBytree(0.8)
+      .setScalePosWeight(1)
+      .setObjective(func_object)
+      .setEvalMetric(func_metric)
+      .setFeaturesCol("features")
+      .setLabelCol("label")
+      .setNthread(1)
+      .setNumWorkers(num_worker)
+      .setSeed(2024)
+      .setMinChildWeight(1)
+    val model = xgbClassifier.fit(xgbInput)
+
+    if (modelPath.nonEmpty && modelFile.nonEmpty) {
+      model.write.overwrite.save(modelPath)
+      //      val gzPath = modelPath + "/" + modelFile
+      //      CompressUtil.compressDirectoryToGzip(modelPath, gzPath)
+    }
+
+    if (testPath.nonEmpty) {
+      val testData = createData(
+        1.0,
+        sc.textFile(testPath),
+        features
+      )
+      val testDataSet = spark.createDataFrame(testData, schema)
+      val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features", "label")
+      val predictions = model.transform(testDataSetTrans)
+
+      println("recsys rov:columns:" + predictions.columns.mkString(",")) //[label, features, probability, prediction, rawPrediction]
+      val saveData = predictions.select("label", "rawPrediction", "probability").rdd
+        .map(r => {
+          (r.get(0), r.get(1), r.get(2)).productIterator.mkString("\t")
+        })
+      val hdfsPath = savePath
+      if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+        println("删除路径并开始数据写入:" + hdfsPath)
+        MyHdfsUtils.delete_hdfs_path(hdfsPath)
+        saveData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+      } else {
+        println("路径不合法,无法写入:" + hdfsPath)
+      }
+
+      val evaluator = new BinaryClassificationEvaluator()
+        .setLabelCol("label")
+        .setRawPredictionCol("probability")
+        .setMetricName("areaUnderROC")
+      val auc = evaluator.evaluate(predictions.select("label", "probability"))
+      println("recsys rov:auc:" + auc)
+    }
+  }
+
+  def createData(negRate: Double, data: RDD[String], features: Array[String]): RDD[Row] = {
+    data.filter(r => {
+        val line: Array[String] = StringUtils.split(r, '\t')
+        val label: Int = NumberUtils.toInt(line(1))
+        line.length > 10 && (label > 0 || new Random().nextDouble() <= negRate)
+      })
+      .map(r => {
+        val line: Array[String] = StringUtils.split(r, '\t')
+        // val logKey = line(0)
+        val label: Int = NumberUtils.toInt(line(1))
+        // val scoresMap = line(2)
+        val map: util.Map[String, Double] = new util.HashMap[String, Double]
+        for (i <- 3 until line.length) {
+          val fv: Array[String] = StringUtils.split(line(i), ':')
+          map.put(fv(0), NumberUtils.toDouble(fv(1), 0.0))
+        }
+
+        val v: Array[Any] = new Array[Any](features.length + 1)
+        v(0) = label
+        for (i <- 0 until features.length) {
+          v(i + 1) = map.getOrDefault(features(i), 0.0d)
+        }
+        Row(v: _*)
+      })
+  }
+}