Browse Source

feat:添加分桶脚本

zhaohaipeng 2 tháng trước cách đây
mục cha
commit
b1c53d403a

+ 160 - 0
src/main/resources/final_feature_1.txt

@@ -0,0 +1,160 @@
+b13_is_return_1_1h
+b13_is_return_1_3h
+b13_is_return_1_24h
+b13_is_return_1_168h
+b13_is_share_1h
+b13_is_share_3h
+b13_is_share_24h
+b13_is_share_168h
+b13_rovn*log(r)_1h
+b13_rovn*log(r)_3h
+b13_rovn*log(r)_24h
+b13_rovn*log(r)_168h
+b13_str_1h
+b13_str_3h
+b13_str_24h
+b13_str_168h
+b13_str_one_1h
+b13_str_one_3h
+b13_str_one_24h
+b13_str_one_168h
+b13_str_plus_1h
+b13_str_plus_3h
+b13_str_plus_24h
+b13_str_plus_168h
+c1_click_12h
+c1_click_24h
+c1_click_72h
+c1_click_168h
+c1_is_return_1_12h
+c1_is_return_1_24h
+c1_is_return_1_72h
+c1_is_return_1_168h
+c1_is_share_12h
+c1_is_share_24h
+c1_is_share_72h
+c1_is_share_168h
+c1_rovn*log(r)_12h
+c1_rovn*log(r)_24h
+c1_rovn*log(r)_72h
+c1_rovn*log(r)_168h
+c1_str_12h
+c1_str_24h
+c1_str_72h
+c1_str_168h
+c1_str_one_12h
+c1_str_one_24h
+c1_str_one_72h
+c1_str_one_168h
+c1_str_plus_12h
+c1_str_plus_24h
+c1_str_plus_72h
+c1_str_plus_168h
+c2_click_12h
+c2_click_168h
+c2_click_24h
+c2_click_72h
+c2_is_return_1_12h
+c2_is_return_1_168h
+c2_is_return_1_24h
+c2_is_return_1_72h
+c2_is_share_12h
+c2_is_share_168h
+c2_is_share_24h
+c2_is_share_72h
+c2_return_n_uv_12h
+c2_return_n_uv_168h
+c2_return_n_uv_24h
+c2_return_n_uv_72h
+c2_share_cnt_12h
+c2_share_cnt_168h
+c2_share_cnt_24h
+c2_share_cnt_72h
+c3_click_12h
+c3_click_168h
+c3_click_24h
+c3_click_72h
+c3_is_return_1_12h
+c3_is_return_1_168h
+c3_is_return_1_24h
+c3_is_return_1_72h
+c3_is_share_12h
+c3_is_share_168h
+c3_is_share_24h
+c3_is_share_72h
+c3_return_n_uv_12h
+c3_return_n_uv_168h
+c3_return_n_uv_24h
+c3_return_n_uv_72h
+c3_share_cnt_12h
+c3_share_cnt_168h
+c3_share_cnt_24h
+c3_share_cnt_72h
+c4_avg_rovn_24h
+c4_avg_rovn_72h
+c4_avg_rovn_168h
+c4_avg_str_24h
+c4_avg_str_72h
+c4_avg_str_168h
+c4_avg_str_one_24h
+c4_avg_str_one_72h
+c4_avg_str_one_168h
+c4_avg_str_plus_24h
+c4_avg_str_plus_72h
+c4_avg_str_plus_168h
+c4_diff_rovn_24h
+c4_diff_rovn_72h
+c4_diff_rovn_168h
+c4_diff_str_24h
+c4_diff_str_72h
+c4_diff_str_168h
+c4_diff_str_one_24h
+c4_diff_str_one_72h
+c4_diff_str_one_168h
+c4_diff_str_plus_24h
+c4_diff_str_plus_72h
+c4_diff_str_plus_168h
+c5_avgscore_tags_1d
+c5_avgscore_tags_3d
+c5_avgscore_tags_7d
+c5_matchnum_tags_1d
+c5_matchnum_tags_3d
+c5_matchnum_tags_7d
+c5_maxscore_tags_1d
+c5_maxscore_tags_3d
+c5_maxscore_tags_7d
+c6_avgscore_tags_1d
+c6_avgscore_tags_3d
+c6_avgscore_tags_7d
+c6_matchnum_tags_1d
+c6_matchnum_tags_3d
+c6_matchnum_tags_7d
+c6_maxscore_tags_1d
+c6_maxscore_tags_3d
+c6_maxscore_tags_7d
+d1_ros_cf_rank
+d1_ros_cf_score
+d1_rov_cf_rank
+d1_rov_cf_score
+d2_rank
+d2_score
+d3_exp
+d3_return_n
+d3_rovn
+total_time
+width
+height
+width/height
+size
+bit_rate
+is_greeting
+festive_sim
+head_title_festive_sim
+head_title_merge1_sim
+head_title_merge2_sim
+merge1_sim
+merge2_sim
+title_sim
+day_of_week
+hour
+create_ts_diff

+ 162 - 0
src/main/resources/final_feature_2.txt

@@ -0,0 +1,162 @@
+b1_is_return_1_1h
+b1_is_return_1_3h
+b1_is_return_1_24h
+b1_is_return_1_168h
+b1_is_share_1h
+b1_is_share_3h
+b1_is_share_24h
+b1_is_share_168h
+b1_rovn*log(r)_1h
+b1_rovn*log(r)_3h
+b1_rovn*log(r)_24h
+b1_rovn*log(r)_168h
+b1_str_1h
+b1_str_3h
+b1_str_24h
+b1_str_168h
+b1_str_one_1h
+b1_str_one_3h
+b1_str_one_24h
+b1_str_one_168h
+b1_str_plus_1h
+b1_str_plus_3h
+b1_str_plus_24h
+b1_str_plus_168h
+b2_is_return_1_1h
+b2_is_return_1_3h
+b2_is_return_1_24h
+b2_is_return_1_168h
+b2_is_share_1h
+b2_is_share_3h
+b2_is_share_24h
+b2_is_share_168h
+b2_rovn*log(r)_1h
+b2_rovn*log(r)_3h
+b2_rovn*log(r)_24h
+b2_rovn*log(r)_168h
+b2_str_1h
+b2_str_3h
+b2_str_24h
+b2_str_168h
+b2_str_one_1h
+b2_str_one_3h
+b2_str_one_24h
+b2_str_one_168h
+b2_str_plus_1h
+b2_str_plus_3h
+b2_str_plus_24h
+b2_str_plus_168h
+b3_is_return_1_24h
+b3_is_return_1_168h
+b3_is_share_24h
+b3_is_share_168h
+b3_rovn*log(r)_24h
+b3_rovn*log(r)_168h
+b3_str_24h
+b3_str_168h
+b3_str_one_24h
+b3_str_one_168h
+b3_str_plus_24h
+b3_str_plus_168h
+b4_is_return_1_6h
+b4_is_return_1_24h
+b4_is_share_6h
+b4_is_share_24h
+b4_rovn*log(r)_6h
+b4_rovn*log(r)_24h
+b4_str_6h
+b4_str_24h
+b4_str_one_6h
+b4_str_one_24h
+b4_str_plus_6h
+b4_str_plus_24h
+b5_is_return_1_6h
+b5_is_return_1_24h
+b5_is_share_6h
+b5_is_share_24h
+b5_rovn*log(r)_6h
+b5_rovn*log(r)_24h
+b5_str_6h
+b5_str_24h
+b5_str_one_6h
+b5_str_one_24h
+b5_str_plus_6h
+b5_str_plus_24h
+b6_is_return_1_6h
+b6_is_return_1_24h
+b6_is_share_6h
+b6_is_share_24h
+b6_rovn*log(r)_6h
+b6_rovn*log(r)_24h
+b6_str_6h
+b6_str_24h
+b6_str_one_6h
+b6_str_one_24h
+b6_str_plus_6h
+b6_str_plus_24h
+b7_is_return_1_6h
+b7_is_return_1_24h
+b7_is_share_6h
+b7_is_share_24h
+b7_rovn*log(r)_6h
+b7_rovn*log(r)_24h
+b7_str_6h
+b7_str_24h
+b7_str_one_6h
+b7_str_one_24h
+b7_str_plus_6h
+b7_str_plus_24h
+b8_is_return_1_1h
+b8_is_return_1_12h
+b8_is_share_1h
+b8_is_share_12h
+b8_rovn*log(r)_1h
+b8_rovn*log(r)_12h
+b8_str_1h
+b8_str_12h
+b8_str_one_1h
+b8_str_one_12h
+b8_str_plus_1h
+b8_str_plus_12h
+b9_is_return_1_1h
+b9_is_return_1_12h
+b9_is_share_1h
+b9_is_share_12h
+b9_rovn*log(r)_1h
+b9_rovn*log(r)_12h
+b9_str_1h
+b9_str_12h
+b9_str_one_1h
+b9_str_one_12h
+b9_str_plus_1h
+b9_str_plus_12h
+b10_is_return_1_6h
+b10_is_share_6h
+b10_rovn*log(r)_6h
+b10_str_6h
+b10_str_one_6h
+b10_str_plus_6h
+b11_is_return_1_1h
+b11_is_share_1h
+b11_rovn*log(r)_1h
+b11_str_1h
+b11_str_one_1h
+b11_str_plus_1h
+b12_is_return_1_14d
+b12_is_return_1_30d
+b12_is_return_1_60d
+b12_is_share_14d
+b12_is_share_30d
+b12_is_share_60d
+b12_rovn*log(r)_14d
+b12_rovn*log(r)_30d
+b12_rovn*log(r)_60d
+b12_str_14d
+b12_str_30d
+b12_str_60d
+b12_str_one_14d
+b12_str_one_30d
+b12_str_one_60d
+b12_str_plus_14d
+b12_str_plus_30d
+b12_str_plus_60d

+ 4 - 4
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys/v20250218/makedata_recsys_42_bucket_20250218.scala

@@ -24,11 +24,11 @@ object makedata_recsys_42_bucket_20250218 {
 
     // 1 读取参数
     val param = ParamUtils.parseArgs(args)
-    val readPath = param.getOrElse("readPath", "/dw/recommend/model/41_recsys_sample_data_v1/20240705*")
-    val savePath = param.getOrElse("savePath", "/dw/recommend/model/41_recsys_bucket/")
-    val fileName = param.getOrElse("fileName", "bucket_20250218_1237")
+    val readPath = param.getOrElse("readPath", "/dw/recommend/model/41_recsys_sample_data/20250221/*")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/42_recsys_bucket/")
+    val fileName = param.getOrElse("fileName", "bucket_20250218_322_1")
     val bucketNum = param.getOrElse("bucketNum", "200").toInt
-    val featureNameFile = param.getOrElse("featureNameFile", "feature_name_20250218.txt")
+    val featureNameFile = param.getOrElse("featureNameFile", "final_feature_1.txt")
     val noBucketFeatureName = param.getOrElse("noBucketFeatureName", "").split(",").filter(r => r.nonEmpty).toList
 
     val loader = getClass.getClassLoader