Explorar el Código

feat:添加ros的分桶文件

zhaohaipeng hace 1 mes
padre
commit
4343bd84d3

+ 0 - 3
src/main/resources/20250306_ros_bucket_232.txt → src/main/resources/20250306_ros_bucket_229.txt

@@ -121,8 +121,6 @@ head_title_merge2_sim	100	0.04747593030333519,0.058884069323539734,0.06781257688
 merge1_sim	100	0.05222834274172783,0.056516557931900024,0.06859127432107925,0.07270785421133041,0.0900970846414566,0.0938306376338005,0.09723561257123947,0.10409583151340485,0.10598330199718475,0.10601004213094711,0.11930127441883087,0.1273241490125656,0.135796457529068,0.14063434302806854,0.14204737544059753,0.1533740609884262,0.15557686984539032,0.16565686464309692,0.1680791825056076,0.17193926870822906,0.17811685800552368,0.17956869304180145,0.18584784865379333,0.18696726858615875,0.19217371940612793,0.19706059992313385,0.19816170632839203,0.2027623951435089,0.21293118596076965,0.21619635820388794,0.21659287810325623,0.22261807322502136,0.22636468708515167,0.2383238524198532,0.2537459433078766,0.25766706466674805,0.25878649950027466,0.25989091396331787,0.26457226276397705,0.2684202194213867,0.2769131660461426,0.2839967906475067,0.2894536554813385,0.2960822880268097,0.2981003224849701,0.30280688405036926,0.31615206599235535,0.32915207743644714,0.3387891948223114,0.3695670962333679,0.38546809554100037,0.39481109380722046,0.4243519902229309,0.4523613452911377,1.0
 merge2_sim	100	0.024302324280142784,0.043850552290678024,0.05296779051423073,0.06457696855068207,0.07686425745487213,0.08087354153394699,0.0873575210571289,0.0905999168753624,0.09901680797338486,0.10183098167181015,0.1049988865852356,0.10818876326084137,0.10999934375286102,0.11336259543895721,0.11634236574172974,0.1191471517086029,0.12175315618515015,0.124903604388237,0.12824474275112152,0.13139861822128296,0.13525646924972534,0.13548268377780914,0.13682138919830322,0.1386777013540268,0.14047813415527344,0.14153242111206055,0.1446356326341629,0.14553695917129517,0.14640040695667267,0.1489420384168625,0.1510317325592041,0.15190385282039642,0.1580697000026703,0.1614585518836975,0.16395914554595947,0.16554199159145355,0.16823482513427734,0.1706419587135315,0.1729206144809723,0.17552916705608368,0.1761520504951477,0.17826171219348907,0.18227477371692657,0.1823231279850006,0.18255364894866943,0.18535543978214264,0.1878947913646698,0.19189496338367462,0.19619008898735046,0.19781804084777832,0.20075823366641998,0.20512160658836365,0.20578724145889282,0.21121136844158173,0.2127571552991867,0.2222549468278885,0.22522509098052979,0.22715339064598083,0.23371613025665283,0.24138802289962769,0.24504241347312927,0.2534606158733368,0.2537463307380676,0.2538544535636902,0.26464712619781494,0.26519763469696045,0.2689698040485382,0.27715927362442017,0.28244394063949585,0.2845076620578766,0.2879163920879364,0.2939591109752655,0.2977142930030823,0.30706867575645447,0.3176025152206421,0.3292410373687744,0.3390907943248749,0.3595609664916992,0.3758666515350342,0.4068518280982971,0.45076829195022583,0.4792214334011078,0.5125172734260559,0.6281015872955322,0.6625102162361145,0.6741371154785156,1.0
 title_sim	100	0.13005706667900085,0.16492784023284912,0.18528838455677032,0.20047706365585327,0.2117595672607422,0.22096523642539978,0.22856654226779938,0.23568406701087952,0.24275624752044678,0.24760660529136658,0.2533603608608246,0.2586359679698944,0.2641458213329315,0.2699487507343292,0.27508172392845154,0.2797171175479889,0.2837998867034912,0.28853127360343933,0.2926584482192993,0.29726290702819824,0.30172717571258545,0.30663955211639404,0.311250776052475,0.31540191173553467,0.31951236724853516,0.32334277033805847,0.3273756206035614,0.3311781883239746,0.33437496423721313,0.337944358587265,0.34131184220314026,0.3440471887588501,0.3474341630935669,0.35048505663871765,0.35413414239883423,0.35744568705558777,0.3608223795890808,0.36346304416656494,0.36650586128234863,0.368836909532547,0.37192365527153015,0.3749753534793854,0.37741968035697937,0.3803941011428833,0.38365551829338074,0.3870891332626343,0.39018139243125916,0.39281341433525085,0.3959602117538452,0.39857593178749084,0.40160125494003296,0.4040621221065521,0.4069803059101105,0.4100947380065918,0.41245684027671814,0.4151410758495331,0.4183798134326935,0.4213269054889679,0.4240550398826599,0.42743465304374695,0.4304982125759125,0.43382367491722107,0.43667277693748474,0.4395895302295685,0.4429381191730499,0.44654086232185364,0.4498916566371918,0.45314931869506836,0.45606857538223267,0.4598219692707062,0.4628152847290039,0.4668230414390564,0.47030460834503174,0.47315356135368347,0.4774361252784729,0.481792151927948,0.48602554202079773,0.4904901385307312,0.494551420211792,0.4994584023952484,0.5037705898284912,0.5077197551727295,0.5126056671142578,0.5177767872810364,0.52275550365448,0.528768002986908,0.5329192876815796,0.5394445657730103,0.5464430451393127,0.5545313954353333,0.5636492371559143,0.5704280734062195,0.5786705613136292,0.5912541151046753,0.6026784777641296,0.622189462184906,0.6504296660423279,0.6825311183929443,1.0
-day_of_week	100	2.0,3.0
-hour	100	1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0
 c4_diff_rovn_168h	100	0.010204,0.016275,0.022222,0.027778,0.033537,0.04,0.045714,0.052632,0.058824,0.066667,0.072874,0.080645,0.088235,0.096154,0.104693,0.112403,0.123188,0.131579,0.142857,0.151515,0.162791,0.169312,0.181818,0.194444,0.202532,0.217391,0.230769,0.249736,0.25,0.270742,0.285714,0.30137,0.321177,0.333333,0.346154,0.365854,0.384615,0.4,0.424658,0.444444,0.466667,0.5,0.516129,0.545455,0.571429,0.6,0.625,0.657143,0.666667,0.708333,0.741935,0.769231,0.8,0.833333,0.875,0.918033,0.979167,1.0,1.03125,1.090909,1.142857,1.2,1.25,1.304348,1.352941,1.42,1.5,1.529412,1.6,1.666667,1.75,1.842105,1.958333,2.0,2.058824,2.1875,2.304348,2.419355,2.5,2.666667,2.833333,3.0,3.111111,3.333333,3.5,3.8,4.0,4.333333,4.666667,5.0,5.504167,6.0,7.0,8.0,9.4,14.0,270.5
 c4_diff_rovn_24h	100	0.008929,0.012987,0.016667,0.02,0.023364,0.027027,0.030303,0.033784,0.037037,0.040816,0.044776,0.048387,0.052632,0.056338,0.060606,0.065217,0.069767,0.074074,0.078947,0.083333,0.090909,0.09434,0.1,0.105263,0.111111,0.117647,0.125,0.130952,0.14,0.142857,0.153846,0.163636,0.166667,0.173913,0.181818,0.196078,0.2,0.210526,0.222222,0.235294,0.25,0.263158,0.277778,0.290323,0.307692,0.333333,0.361702,0.378378,0.4,0.416667,0.4375,0.461538,0.5,0.52381,0.555556,0.588235,0.615385,0.661017,0.666667,0.714286,0.75,0.787879,0.833333,0.875,0.944444,1.0,1.055556,1.142857,1.205882,1.285714,1.355932,1.473684,1.5,1.666667,1.75,1.909091,2.0,2.058824,2.266667,2.5,2.666667,3.0,3.5,4.0,4.307692,5.0,5.714286,6.833333,8.5,12.333333,225.5
 c4_diff_rovn_72h	100	0.009091,0.014056,0.018519,0.022936,0.027273,0.03177,0.036545,0.041322,0.046041,0.051282,0.056122,0.0625,0.066667,0.073171,0.079365,0.085106,0.090909,0.1,0.105263,0.111111,0.121212,0.127273,0.136842,0.142857,0.153846,0.166667,0.181818,0.192307,0.2,0.212121,0.222222,0.238095,0.25,0.257143,0.272727,0.285714,0.30303,0.321918,0.333333,0.338462,0.363636,0.380952,0.4,0.418605,0.441176,0.464286,0.5,0.536585,0.565217,0.596154,0.621622,0.655172,0.666667,0.708333,0.75,0.77551,0.811321,0.851064,0.896552,0.954545,1.0,1.071429,1.136364,1.2,1.25,1.333333,1.377358,1.466667,1.5,1.6,1.666667,1.777778,1.888889,2.0,2.178571,2.333333,2.5,2.611111,2.8,3.0,3.142857,3.4,3.666667,4.0,4.307692,4.75,5.166667,6.0,6.666667,7.833333,9.333333,14.0,231.5
@@ -201,7 +199,6 @@ c4_avg_ros_one_168h	100	0.030651,0.046512,0.061821,0.075099,0.088889,0.101191,0.
 c4_avg_ros_one_24h	100	0.006944,0.010638,0.013889,0.017284,0.020455,0.02381,0.026667,0.029851,0.033333,0.035714,0.039216,0.041667,0.045455,0.04955,0.052632,0.055556,0.059804,0.0625,0.066667,0.071429,0.074713,0.079365,0.083333,0.085714,0.090909,0.095654,0.1,0.104545,0.111111,0.113487,0.12,0.125,0.128086,0.133333,0.141652,0.145455,0.152381,0.16,0.166667,0.175,0.18315,0.190848,0.2,0.202749,0.213043,0.222222,0.230108,0.240705,0.25,0.259722,0.272109,0.283784,0.294118,0.305556,0.319805,0.333333,0.347222,0.363636,0.375,0.395833,0.409091,0.428571,0.444444,0.466667,0.494949,0.5,0.534799,0.5625,0.590514,0.619048,0.651235,0.666667,0.710145,0.75,0.785714,0.833333,0.875,0.9375,1.0,1.00641,1.1,1.181818,1.27,1.375,1.5,1.636364,1.8,2.0,2.25,2.6,3.0,3.703704,4.875,7.25,388.0
 c4_avg_ros_one_72h	100	0.014706,0.022727,0.030154,0.037037,0.043478,0.05,0.057143,0.06405,0.071429,0.078431,0.083333,0.091667,0.1,0.107143,0.1125,0.122276,0.126102,0.135472,0.142857,0.151515,0.161905,0.166667,0.173554,0.183333,0.193322,0.2,0.208916,0.220467,0.229167,0.240741,0.25,0.251131,0.264987,0.276316,0.285715,0.3,0.310186,0.323037,0.333333,0.334959,0.35,0.363636,0.375,0.389039,0.4,0.416667,0.428572,0.444444,0.459674,0.476191,0.497403,0.5,0.507863,0.527778,0.546875,0.565432,0.583333,0.601533,0.625,0.645106,0.666667,0.682716,0.708333,0.733333,0.750861,0.781944,0.809524,0.833334,0.869565,0.9,0.9375,0.980695,1.0,1.027778,1.074074,1.121693,1.166667,1.222222,1.275362,1.333333,1.4,1.481481,1.545455,1.635863,1.733333,1.833853,1.9825,2.083333,2.25,2.4375,2.666667,2.925926,3.236111,3.666667,4.222222,5.0,6.349817,9.125,388.0
 create_ts_diff	100	2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,14.0,16.0,20.0,23.0,26.0,36.0,43.0,46.0,47.0,49.0,58.0,67.0,69.0,74.0,75.0,77.0,79.0,86.0,89.0,96.0,99.0,108.0,112.0,117.0,144.0,147.0,153.0,167.0,192.0,195.0,216.0,236.0,253.0,272.0,396.0,470.0,2214.0
-is_greeting	100	1.0
 total_time	100	21.0,22.0,26.0,29.0,30.0,32.0,33.0,35.0,41.0,43.0,50.0,54.0,61.0,66.0,70.0,75.0,77.0,78.0,80.0,83.0,85.0,88.0,91.0,93.0,96.0,97.0,98.0,99.0,100.0,101.0,104.0,106.0,107.0,109.0,112.0,113.0,115.0,119.0,124.0,129.0,132.0,133.0,134.0,138.0,142.0,144.0,145.0,148.0,149.0,151.0,154.0,163.0,171.0,172.0,177.0,179.0,180.0,187.0,193.0,199.0,201.0,204.0,209.0,216.0,222.0,233.0,249.0,251.0,259.0,270.0,277.0,289.0,309.0,313.0,328.0,345.0,350.0,388.0,399.0,420.0,456.0,536.0,3240.0
 width	100	268.0,320.0,360.0,480.0,486.0,498.0,540.0,720.0,1080.0,1280.0,3840.0
 height	100	450.0,480.0,580.0,640.0,648.0,664.0,720.0,854.0,856.0,884.0,960.0,1080.0,1280.0,1288.0,1920.0,3840.0

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys/v20250218/makedata_recsys_43_ros_multi_data_bucket_20250304.scala

@@ -18,7 +18,7 @@ object makedata_recsys_43_ros_multi_data_bucket_20250304 {
 
     // 1 读取参数
     val param = ParamUtils.parseArgs(args)
-    val readPath = param.getOrElse("readPath", "/dw/recommend/model/41_recsys_ros_train_data")
+    val readPath = param.getOrElse("readPath", "/dw/recommend/model/41_recsys_ros_train_data/")
     val savePath = param.getOrElse("savePath", "/dw/recommend/model/43_recsys_ros_data_bucket/")
     val beginStr = param.getOrElse("beginStr", "20250224")
     val endStr = param.getOrElse("endStr", "20250302")
@@ -27,7 +27,7 @@ object makedata_recsys_43_ros_multi_data_bucket_20250304 {
     val noBucketFeature = param.getOrElse("noBucketFeature", "hour,is_greeting,day_of_week").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "return_n_uv_noself")
     val whatApps = param.getOrElse("whatApps", "0,4,2,32,17,18,21,22,24,25,26,27,28,29,3,30,31,33,34,35,36").split(",").toSet
-    val fileName = param.getOrElse("fileName", "20250306_ros_bucket_232.txt")
+    val fileName = param.getOrElse("fileName", "20250306_ros_bucket_229.txt")
 
     val spark = SparkSession
       .builder()