Browse Source

Merge branch 'master' into feature_zhaohaipeng

zhaohaipeng 4 months ago
parent
commit
76521e6ef0

+ 47 - 0
src/main/resources/20241128_recsys_i2i_bucket_47.txt

@@ -0,0 +1,47 @@
+action:rov_day336	100	0.019854,0.024964,0.025876,0.027656,0.03079,0.033211,0.034639,0.036093,0.036864,0.037524,0.037918,0.038907,0.039264,0.039654,0.040559,0.041864,0.042842,0.044022,0.04473,0.045306,0.04625,0.046736,0.047414,0.047688,0.047849,0.048131,0.048751,0.0488,0.049114,0.049622,0.050601,0.050742,0.050954,0.051239,0.052302,0.052783,0.052804,0.053545,0.054264,0.054668,0.055453,0.056391,0.057024,0.057573,0.058317,0.058989,0.059534,0.061089,0.06275,0.063433,0.064061,0.065245,0.065346,0.067091,0.067592,0.068102,0.068852,0.069307,0.070297,0.071266,0.072067,0.072495,0.073921,0.07496,0.075024,0.078836,0.079371,0.080406,0.081501,0.090802,0.103937,0.206944,0.23445
+cate1:ros_day30	100	0.568913,0.775484,0.780939,0.802548,0.869337,0.893388,0.928003,0.928705,0.935544,0.986266,0.994482,1.022163,1.054331,1.085911,1.176906,1.193622,1.227002,1.294849,1.402791
+action:ros_day21	100	0.25,0.328602,0.365854,0.387535,0.404136,0.452126,0.465116,0.479065,0.496933,0.516129,0.52012,0.53876,0.543434,0.556543,0.589769,0.6,0.604136,0.609922,0.617376,0.631866,0.651121,0.673048,0.688716,0.707252,0.720015,0.728878,0.746781,0.757868,0.767714,0.778018,0.793709,0.814338,0.81546,0.820637,0.835823,0.848272,0.860068,0.875434,0.893281,0.899405,0.908121,0.912633,0.916364,0.922581,0.92456,0.937168,0.956125,0.965547,0.966331,0.994555,1.004505,1.010526,1.017194,1.018069,1.023735,1.0359,1.054629,1.076291,1.096155,1.114377,1.120894,1.131766,1.169232,1.184152,1.187872,1.20162,1.209869,1.210319,1.223057,1.278414,1.302882,1.31542,1.359835,1.383904,1.418945,1.451771,1.493695,1.542568,1.622861,1.66065,1.793735,22.0
+action:str_day336	100	0.027696,0.030167,0.031642,0.033286,0.0335,0.03405,0.034097,0.034306,0.034368,0.034781,0.03511,0.03518,0.035215,0.035287,0.035835,0.036406,0.037134,0.037399,0.03829,0.038314,0.038991,0.039151,0.039866,0.040019,0.040565,0.041311,0.041606,0.041796,0.042902,0.042939,0.043499,0.043906,0.044136,0.044519,0.045218,0.045581,0.04618,0.046208,0.046557,0.047247,0.047858,0.048742,0.04893,0.049489,0.049953,0.050708,0.051685,0.052182,0.052284,0.05309,0.053489,0.054153,0.055099,0.056472,0.056837,0.056984,0.058334,0.059659,0.060481,0.061499,0.06314,0.064178,0.066172,0.0669,0.069459,0.070702,0.072543,0.076603,0.08157,0.084894,0.088682,0.093633,0.102991,1.278912
+action:rov_day7	100	0.011834,0.01391,0.015558,0.017769,0.019506,0.020764,0.020934,0.022329,0.024305,0.025658,0.026879,0.027964,0.02951,0.030255,0.030529,0.031355,0.03175,0.032465,0.033259,0.033396,0.033947,0.034491,0.034926,0.035653,0.036802,0.037338,0.037833,0.039025,0.039665,0.040042,0.04057,0.04097,0.041458,0.041772,0.042002,0.04288,0.04373,0.044321,0.044634,0.045391,0.046466,0.046521,0.046577,0.047264,0.047882,0.048202,0.048778,0.049052,0.049129,0.049817,0.049925,0.050304,0.050754,0.051228,0.051571,0.051769,0.051897,0.052922,0.053455,0.054602,0.055906,0.056395,0.056713,0.056761,0.057419,0.057487,0.058291,0.058677,0.058815,0.059958,0.062278,0.062582,0.063155,0.064239,0.066725,0.067458,0.06892,0.073396,0.074284,0.080075,0.082772,0.083973,9.666667
+cate2:rov_day1	100	0.026118,0.02717,0.0278,0.032997,0.033808,0.033995,0.037557,0.037572,0.040033,0.041213,0.04411,0.045117,0.047059,0.048886,0.049236,0.052352,0.053487,0.056874,0.059551,0.060377,0.063341,0.095622
+cate2:rov_day30	100	0.033632,0.038707,0.041573,0.044125,0.045851,0.046259,0.046815,0.047222,0.047335,0.047616,0.048904,0.04908,0.050429,0.051145,0.052511,0.054189,0.055182,0.057711,0.063536
+cate1:vovd1_day30	100	0.419087,0.421103,0.457025,0.480801,0.489344,0.498186,0.512282,0.514182,0.51446,0.517509,0.518859,0.525887,0.542596,0.571369,0.571914,0.58392,0.618863,0.662473,0.67297
+cate2:str_day1	100	0.035124,0.038644,0.039721,0.040222,0.040396,0.040966,0.041068,0.043825,0.045943,0.046239,0.046699,0.050678,0.052013,0.052262,0.059016,0.068581,0.071479,0.071686,0.080863,0.08236
+cate2:rov_day3	100	0.026846,0.027896,0.029803,0.032872,0.033258,0.034565,0.03626,0.03945,0.041853,0.044162,0.045321,0.046555,0.0479,0.050589,0.052187,0.053566,0.057363,0.059005,0.078679
+cate1:rov_day1	100	0.032978,0.041151,0.041269,0.04214,0.04282,0.043561,0.04411,0.046826,0.048145,0.048886,0.050366,0.051036,0.052371,0.054471,0.057223,0.0585,0.063412,0.068202,0.069998
+cate2:ros_day3	100	0.314491,0.376721,0.521202,0.62513,0.715105,0.723817,0.74503,0.779485,0.791123,0.821386,0.823861,0.832823,0.905328,0.9225,1.043391,1.107022,1.151774,1.159685,1.180504,1.236465,1.275264,1.469523
+cate2:str_day7	100	0.038319,0.039771,0.042056,0.042651,0.043288,0.043932,0.04456,0.044671,0.044821,0.045327,0.046423,0.046512,0.047975,0.054029,0.05444,0.055424,0.063758,0.068376,0.07106,0.081865,0.08189,0.094847
+cate1:rov_day3	100	0.033398,0.040212,0.042089,0.042606,0.042607,0.044162,0.045844,0.046173,0.047263,0.04779,0.0479,0.048133,0.04828,0.050624,0.051353,0.051611,0.054355,0.064011,0.064333,0.068423
+action:rov_day1	100	0.006645,0.010671,0.0131,0.015215,0.016408,0.017405,0.018827,0.019993,0.020532,0.0222,0.022965,0.023864,0.024839,0.02634,0.027364,0.028571,0.029044,0.030108,0.030729,0.03236,0.033761,0.03442,0.035693,0.037016,0.038361,0.039324,0.039844,0.040173,0.040317,0.040831,0.041873,0.042067,0.0426,0.042731,0.043056,0.043605,0.044204,0.044778,0.045463,0.046139,0.046308,0.04694,0.048285,0.048383,0.048873,0.048901,0.04912,0.049384,0.049504,0.049659,0.050326,0.050395,0.050727,0.050889,0.051361,0.051669,0.052166,0.052325,0.052673,0.052999,0.053938,0.054452,0.055513,0.055937,0.056182,0.056993,0.057215,0.057573,0.059021,0.059079,0.059399,0.059889,0.060132,0.060645,0.060833,0.063576,0.065285,0.068285,0.07037,0.072983,0.073639,0.075258,0.084614,0.091848,0.100404,7.0
+action:ros_day336	100	0.348613,0.434862,0.453572,0.513781,0.534658,0.556676,0.576444,0.603072,0.654221,0.693712,0.715447,0.731941,0.752071,0.761062,0.772958,0.797642,0.807481,0.836035,0.853369,0.88675,0.904697,0.931785,0.947704,0.965427,0.981116,0.996465,1.011519,1.027801,1.051123,1.055017,1.059061,1.060222,1.065558,1.076977,1.080115,1.10106,1.101198,1.113232,1.133651,1.142515,1.15843,1.19254,1.219361,1.245118,1.269212,1.26929,1.281102,1.316645,1.369174,1.418259,1.424134,1.457729,1.481606,1.508852,1.532746,1.552832,1.55388,1.561422,1.636015,1.703974,1.751432,1.776608,1.803191,1.80948,1.820939,1.822301,1.823567,1.855319,1.862946,1.899702,1.981808,2.041562,2.313638,2.334647,3.364988,5.065342
+action:vovd1_day7	100	0.137666,0.205238,0.239319,0.257397,0.278364,0.297826,0.308853,0.323599,0.330846,0.339474,0.345131,0.355611,0.365971,0.378241,0.386534,0.394485,0.401745,0.4053,0.4096,0.418047,0.423613,0.43029,0.437099,0.440698,0.44838,0.452893,0.457572,0.460936,0.461213,0.469853,0.472214,0.478025,0.485588,0.491587,0.492244,0.497435,0.498857,0.50138,0.502244,0.510647,0.51313,0.51375,0.518796,0.522852,0.52421,0.526356,0.531474,0.534258,0.54725,0.553808,0.555702,0.561948,0.57056,0.573226,0.574231,0.580617,0.592342,0.606824,0.61233,0.616977,0.619873,0.624001,0.629072,0.630574,0.638138,0.641281,0.6527,0.653959,0.659965,0.661916,0.665125,0.665898,0.676628,0.691067,0.69406,0.697332,0.697739,0.708397,0.720571,0.739653,0.765283,0.788233,0.820842,0.974022,66.0
+cate1:str_day30	100	0.043294,0.043718,0.044014,0.045551,0.045938,0.046901,0.04757,0.048139,0.050299,0.051885,0.052461,0.052698,0.053114,0.054098,0.060293,0.062666,0.063203,0.072972,0.083519
+cate1:rov_day30	100	0.041354,0.041515,0.045137,0.046642,0.047085,0.047451,0.047478,0.048904,0.049013,0.04908,0.049602,0.050241,0.050293,0.053031,0.053143,0.053609,0.054005,0.059483,0.060733
+cate2:ros_day7	100	0.390503,0.39766,0.578565,0.716623,0.752324,0.754474,0.776025,0.838096,0.868822,0.871489,0.878833,0.885504,0.888563,0.925512,1.012344,1.081793,1.102885,1.11219,1.127939,1.163207,1.238196,1.394579
+cate2:str_day30	100	0.036034,0.040884,0.042769,0.044628,0.04554,0.045578,0.046651,0.046863,0.047346,0.049194,0.051813,0.052461,0.052698,0.056434,0.061652,0.061676,0.065602,0.072413,0.076976
+cate1:vovd1_day1	100	0.381544,0.441132,0.445324,0.467233,0.469086,0.469838,0.478436,0.48103,0.519413,0.522035,0.523529,0.52888,0.53239,0.537782,0.565363,0.574103,0.575852,0.589783,0.706623,0.743416,0.912354
+cate1:vovd1_day3	100	0.373584,0.417485,0.445449,0.452683,0.460663,0.47318,0.473297,0.473597,0.475075,0.488597,0.492541,0.509136,0.510421,0.519506,0.555438,0.593366,0.653946,0.685791,0.798356
+cate1:ros_day3	100	0.540985,0.651424,0.687296,0.70954,0.760934,0.821386,0.838121,0.872193,0.900231,0.905328,0.967716,1.021737,1.063502,1.086678,1.097226,1.167411,1.181074,1.342978,1.369609,1.400263
+cate1:str_day7	100	0.040618,0.040838,0.041237,0.041273,0.042872,0.046365,0.046746,0.046876,0.047723,0.047737,0.048656,0.051212,0.053431,0.05444,0.055187,0.055424,0.06004,0.061695,0.062462,0.067108,0.067844,0.068583,0.085334
+cate2:rov_day7	100	0.030134,0.031978,0.033098,0.036565,0.037636,0.038902,0.04181,0.044573,0.045799,0.046316,0.046451,0.047662,0.048373,0.049034,0.049267,0.05026,0.051588,0.051632,0.056584,0.06822
+action:ros_day7	100	0.235294,0.285472,0.3,0.336364,0.346555,0.365527,0.370192,0.38741,0.401695,0.406203,0.435685,0.451705,0.472144,0.483173,0.506849,0.533366,0.548633,0.565574,0.584775,0.601219,0.628705,0.655896,0.670468,0.688501,0.696132,0.706748,0.722412,0.740503,0.753801,0.781575,0.791011,0.813642,0.815923,0.817254,0.82943,0.836958,0.847162,0.853241,0.856176,0.864239,0.886174,0.889372,0.894357,0.904244,0.922517,0.9375,0.950874,0.969653,0.991328,1.011533,1.020426,1.022207,1.041193,1.063075,1.069047,1.071736,1.076515,1.084513,1.085469,1.102302,1.110046,1.146137,1.165161,1.186323,1.189572,1.210379,1.217369,1.218697,1.230732,1.251783,1.289474,1.302491,1.318563,1.337901,1.339483,1.399507,1.431193,1.446613,1.448433,1.533333,1.602524,1.68916,1.705158,2.134192,56.0
+action:str_day21	100	0.02529,0.025918,0.026903,0.027762,0.028975,0.029358,0.029759,0.031034,0.031404,0.031486,0.031523,0.03199,0.033401,0.033404,0.033816,0.034923,0.035043,0.035242,0.035405,0.036101,0.036718,0.036796,0.036905,0.036934,0.037151,0.037436,0.037648,0.037743,0.038547,0.039119,0.03998,0.040804,0.041602,0.041932,0.042623,0.042741,0.042858,0.043654,0.043674,0.043831,0.044052,0.044966,0.045036,0.045468,0.04597,0.046178,0.047072,0.047778,0.047981,0.048224,0.048268,0.048517,0.048907,0.049482,0.050441,0.051567,0.052579,0.053673,0.055797,0.056378,0.057306,0.057499,0.059821,0.061331,0.061444,0.06305,0.063712,0.064352,0.067355,0.071703,0.076664,0.078362,0.080203,0.082609,0.083713,0.089455,0.104439,12.2
+action:str_day1	100	0.017324,0.021725,0.02407,0.025399,0.026721,0.027972,0.028949,0.030162,0.031023,0.032131,0.032741,0.033061,0.034113,0.034893,0.035262,0.035872,0.035971,0.036071,0.036502,0.037155,0.037493,0.037617,0.037677,0.038439,0.039401,0.040038,0.040611,0.040757,0.041109,0.041609,0.042446,0.042696,0.043116,0.043287,0.043652,0.043956,0.044368,0.044531,0.045105,0.045271,0.045521,0.045849,0.045905,0.046234,0.046542,0.046948,0.046957,0.047394,0.047995,0.048265,0.049201,0.049995,0.050314,0.051602,0.052819,0.053446,0.054108,0.05495,0.055071,0.055349,0.056438,0.056748,0.057858,0.059144,0.060907,0.063513,0.063704,0.065497,0.065569,0.066413,0.067175,0.067752,0.069989,0.07229,0.074536,0.076372,0.078269,0.079786,0.081651,0.087351,0.092959,0.09716,0.098375,0.111731,0.114471,4.0
+cate2:vovd1_day7	100	0.356425,0.362591,0.373599,0.40264,0.420118,0.426541,0.447309,0.452845,0.459098,0.463879,0.487661,0.491812,0.493725,0.499914,0.507697,0.5156,0.523791,0.534578,0.547378,0.561814,0.574644
+cate2:vovd1_day30	100	0.370944,0.443582,0.463276,0.482661,0.483314,0.48982,0.491756,0.49963,0.513591,0.514496,0.517509,0.518859,0.529267,0.537233,0.542512,0.551456,0.564547,0.62894,0.685266
+action:vovd1_day336	100	0.248048,0.296243,0.309127,0.337845,0.343714,0.372605,0.390476,0.400762,0.410739,0.429253,0.443089,0.451999,0.456237,0.463742,0.46671,0.473092,0.481418,0.482771,0.491281,0.497179,0.503485,0.509267,0.511139,0.511264,0.51902,0.522388,0.52348,0.529949,0.534409,0.537132,0.541402,0.543096,0.544218,0.546881,0.556071,0.55956,0.566337,0.570893,0.575426,0.577667,0.582255,0.585322,0.586476,0.592316,0.596397,0.597903,0.60241,0.610119,0.615851,0.624853,0.636624,0.649087,0.659438,0.662265,0.67485,0.679742,0.688169,0.693111,0.694488,0.699781,0.702592,0.708096,0.727242,0.749829,0.755498,0.758281,0.785222,0.786199,0.790469,0.791362,0.794589,0.827586,0.924981,1.108323,1.803599,2.663537
+cate1:str_day1	100	0.035608,0.03656,0.041586,0.042091,0.043074,0.046,0.046257,0.046782,0.047716,0.047823,0.048981,0.04941,0.052013,0.052262,0.053882,0.060194,0.064039,0.066976,0.067135,0.081139
+action:vovd1_day21	100	0.156087,0.199765,0.226021,0.251534,0.267941,0.280154,0.285975,0.291124,0.296886,0.313966,0.32,0.322216,0.327358,0.330584,0.334689,0.340277,0.343639,0.347172,0.350845,0.354745,0.362177,0.367886,0.371314,0.371816,0.38003,0.383879,0.392,0.396224,0.401546,0.408392,0.413505,0.417704,0.419192,0.419686,0.430666,0.43259,0.438847,0.443279,0.445256,0.447127,0.452189,0.461607,0.472475,0.473825,0.479948,0.487464,0.494902,0.500678,0.50193,0.506677,0.507536,0.511972,0.521198,0.529263,0.532299,0.54172,0.543133,0.544893,0.548595,0.551223,0.56902,0.579565,0.586334,0.5894,0.603461,0.611138,0.611801,0.620005,0.636173,0.663272,0.698996,0.713037,0.750082,0.761945,0.821366,0.844093,0.851573,29.0
+cate2:ros_day30	100	0.436914,0.518208,0.761409,0.784233,0.812472,0.829719,0.894708,0.896962,0.913583,0.928003,0.935544,0.951323,0.969855,1.029472,1.036543,1.106437,1.114378,1.120517,1.287282,1.310473,1.479188
+cate2:vovd1_day1	100	0.281409,0.303588,0.313957,0.366302,0.386282,0.392412,0.432701,0.436011,0.467233,0.48802,0.491061,0.506301,0.507795,0.528009,0.52888,0.538166,0.557429,0.580056,0.618626,0.639548,0.661279
+action:ros_day1	100	0.192308,0.273587,0.283582,0.310345,0.335807,0.357913,0.374771,0.4,0.425856,0.429936,0.45159,0.466262,0.48,0.494245,0.506438,0.521403,0.550201,0.571721,0.586611,0.60972,0.620032,0.643861,0.652411,0.660232,0.687753,0.716007,0.728519,0.74258,0.753153,0.764706,0.790535,0.796106,0.80349,0.836454,0.846076,0.860111,0.881054,0.884846,0.895225,0.905669,0.920299,0.923548,0.938824,0.9625,0.981852,0.992625,0.998067,1.016669,1.018298,1.022431,1.029801,1.052233,1.068822,1.071901,1.088889,1.108568,1.127615,1.156554,1.170245,1.22768,1.267618,1.26937,1.293097,1.311524,1.318015,1.329235,1.333881,1.366553,1.403406,1.438059,1.478431,1.49064,1.498601,1.505104,1.52908,1.566296,1.603306,1.617925,1.694509,1.769295,1.831797,1.965463,2.047187,17.0
+action:str_day7	100	0.02371,0.027264,0.029139,0.030503,0.031529,0.032865,0.03318,0.03385,0.034541,0.035331,0.035675,0.036936,0.037649,0.038091,0.038308,0.038784,0.039253,0.039561,0.04015,0.040387,0.040991,0.041353,0.041436,0.041776,0.042165,0.042581,0.042944,0.043717,0.043982,0.044402,0.045644,0.046125,0.046733,0.046875,0.047026,0.047115,0.047583,0.047815,0.048015,0.048511,0.048933,0.049401,0.04988,0.050733,0.051045,0.051934,0.052151,0.052965,0.053091,0.053144,0.053185,0.053187,0.054155,0.054389,0.05503,0.055528,0.056086,0.05609,0.056597,0.057627,0.058523,0.060062,0.060753,0.06266,0.063993,0.064689,0.066117,0.067578,0.068979,0.069121,0.07051,0.072706,0.076075,0.076781,0.08086,0.085863,0.086148,0.089955,0.091713,0.095259,0.106007,0.124617,0.127122,3.5
+cate1:ros_day1	100	0.519355,0.66865,0.69915,0.811369,0.84014,0.84403,0.870042,0.897146,0.939885,1.00774,1.143832,1.176866,1.183983,1.196572,1.211117,1.22336,1.395947,1.474438,1.496271
+cate1:str_day3	100	0.038626,0.040474,0.043725,0.043868,0.044441,0.045714,0.046753,0.046972,0.047257,0.049522,0.050513,0.052909,0.053765,0.055499,0.058611,0.060402,0.065406,0.066702,0.067354,0.074332,0.084941
+cate1:ros_day7	100	0.635802,0.653971,0.65945,0.786459,0.838096,0.855184,0.868264,0.870284,0.888563,0.922251,0.9723,0.998596,1.082772,1.098859,1.111132,1.140248,1.222733,1.26638,1.323897,1.333723
+cate1:vovd1_day7	100	0.429809,0.433815,0.448448,0.452044,0.456199,0.461723,0.468483,0.473863,0.482026,0.491812,0.505874,0.513677,0.5156,0.521871,0.585771,0.590815,0.623261,0.623742,0.654694,0.69024
+action:rov_day21	100	0.012194,0.015795,0.018156,0.019479,0.020871,0.022283,0.02243,0.022818,0.023734,0.023792,0.024912,0.02574,0.026363,0.026752,0.027949,0.027969,0.029016,0.029888,0.030381,0.030485,0.030778,0.030791,0.031497,0.031525,0.031665,0.032079,0.032328,0.033126,0.033836,0.034304,0.034613,0.034621,0.035005,0.035409,0.036235,0.036529,0.037285,0.037697,0.037845,0.03808,0.039235,0.039677,0.04048,0.040792,0.040898,0.040997,0.041597,0.042572,0.043476,0.043678,0.044648,0.045501,0.046636,0.046883,0.04704,0.047114,0.048054,0.048444,0.048891,0.049643,0.050005,0.050938,0.051163,0.052014,0.052591,0.053466,0.054724,0.055605,0.057282,0.058256,0.058721,0.060107,0.062902,0.063506,0.065871,0.068452,0.074924,0.075672,0.080895,0.083062,3.0
+cate1:rov_day7	100	0.036666,0.042247,0.042418,0.042667,0.043535,0.044368,0.045227,0.046451,0.047062,0.047219,0.04723,0.04767,0.048373,0.049901,0.050948,0.05276,0.054466,0.057854,0.060435,0.06132,0.061887
+cate2:vovd1_day3	100	0.310271,0.323133,0.340636,0.36469,0.364953,0.387506,0.393317,0.39418,0.435147,0.452893,0.453187,0.467103,0.473297,0.479693,0.503452,0.510421,0.516627,0.519987,0.540275,0.569572,0.58257,0.607636
+cate2:ros_day1	100	0.337544,0.398472,0.525643,0.636424,0.737146,0.746133,0.756802,0.823211,0.841549,0.84403,0.850929,0.871358,0.939885,1.03452,1.18473,1.206837,1.217894,1.290502,1.358827,1.369865,1.694856
+cate2:str_day3	100	0.03577,0.039348,0.039679,0.040957,0.04204,0.042171,0.043124,0.043777,0.044224,0.044981,0.045699,0.046297,0.047884,0.052909,0.053765,0.055292,0.066415,0.0721,0.082512,0.088704

+ 47 - 0
src/main/resources/20241128_recsys_i2i_bucket_47_v2.txt

@@ -0,0 +1,47 @@
+cate1:vovd1_day30	100	0.341772,0.419087,0.457025,0.462716,0.480801,0.484549,0.489344,0.498186,0.512282,0.514182,0.51446,0.517509,0.524029,0.525887,0.52939,0.542596,0.571369,0.571914,0.58392,0.608948,0.618863,0.662473,0.67297
+cate2:str_day1	100	0.033038,0.036905,0.037686,0.038644,0.03879,0.039721,0.040396,0.040567,0.040966,0.041068,0.041139,0.043351,0.04483,0.045837,0.045943,0.046239,0.046699,0.050678,0.052013,0.059016,0.065793,0.071479,0.071686,0.081906,0.08236
+cate1:rov_day3	100	0.021761,0.026643,0.027054,0.033398,0.037379,0.038136,0.040212,0.042089,0.042231,0.042606,0.042607,0.044162,0.045844,0.046173,0.047263,0.048133,0.04828,0.050624,0.051353,0.051611,0.054355,0.064011,0.064333,0.068423
+cate1:ros_day3	100	0.344306,0.540985,0.594047,0.651424,0.687296,0.70954,0.760934,0.821386,0.838121,0.845957,0.900231,0.967716,0.985816,1.021737,1.063502,1.086678,1.097226,1.167411,1.181074,1.251183,1.342978,1.369609,1.400263
+cate1:str_day7	100	0.036657,0.040618,0.040838,0.041237,0.041273,0.042872,0.044776,0.046365,0.046746,0.046876,0.047723,0.047737,0.048173,0.048656,0.051212,0.055187,0.055325,0.055424,0.06004,0.061695,0.066902,0.067108,0.068583,0.085334
+action:ros_day7	100	0.214286,0.271307,0.293688,0.329089,0.336364,0.361758,0.365527,0.370192,0.378351,0.394963,0.401695,0.40982,0.440529,0.451816,0.466164,0.481571,0.499661,0.516484,0.534188,0.546428,0.553358,0.58131,0.58836,0.608108,0.629786,0.652079,0.670468,0.688501,0.692796,0.704522,0.709165,0.734669,0.741954,0.777778,0.786458,0.791011,0.800711,0.815923,0.821204,0.82943,0.839474,0.853241,0.859198,0.864239,0.886174,0.889372,0.896594,0.908172,0.932421,0.950874,0.969653,0.980604,0.996579,1.020426,1.022207,1.045031,1.063075,1.071736,1.072464,1.07656,1.084513,1.100654,1.110046,1.160494,1.165161,1.189572,1.217369,1.218697,1.230732,1.239416,1.251783,1.283452,1.302491,1.318563,1.337901,1.339483,1.386641,1.430006,1.446613,1.448433,1.522022,1.602524,1.68916,1.705158,2.134192,59.0
+action:str_day21	100	0.024285,0.025745,0.026863,0.026903,0.027851,0.029229,0.029536,0.030846,0.031294,0.031483,0.031523,0.031747,0.033248,0.033646,0.03383,0.034923,0.035043,0.035242,0.036045,0.036101,0.036623,0.036796,0.036923,0.037151,0.03738,0.037697,0.037743,0.038069,0.038966,0.039901,0.03998,0.040526,0.041428,0.041721,0.042493,0.042623,0.042741,0.043354,0.043674,0.04377,0.043831,0.044293,0.045036,0.045399,0.045628,0.046099,0.047072,0.047416,0.047848,0.048224,0.048268,0.048333,0.048517,0.048867,0.049192,0.049938,0.051403,0.05203,0.05325,0.054972,0.056361,0.056378,0.056789,0.057306,0.057499,0.059114,0.060704,0.061444,0.062933,0.063712,0.064262,0.066167,0.069497,0.074911,0.076664,0.077679,0.078362,0.081807,0.083713,0.093197,0.102429,23.166667
+action:rov_day21	100	0.012014,0.013199,0.017201,0.018156,0.02007,0.020871,0.022283,0.02243,0.023355,0.023734,0.024132,0.024912,0.026058,0.026431,0.026752,0.027771,0.02815,0.029438,0.029888,0.030485,0.030778,0.030791,0.031283,0.031665,0.032005,0.032328,0.03278,0.033664,0.034019,0.034304,0.034613,0.034621,0.034869,0.035185,0.036168,0.036514,0.037243,0.037409,0.037697,0.03808,0.038206,0.039419,0.040198,0.040642,0.040898,0.040997,0.041227,0.042281,0.043136,0.043476,0.043678,0.044648,0.045062,0.045501,0.046636,0.046755,0.04704,0.047555,0.048375,0.048504,0.049395,0.049643,0.050484,0.051163,0.052014,0.052591,0.052933,0.054704,0.055605,0.057282,0.058256,0.058721,0.060326,0.062902,0.063506,0.066719,0.073334,0.075672,0.076082,0.080895,0.083062,5.0
+action:vovd1_day336	100	0.248048,0.269801,0.296243,0.316646,0.337845,0.343714,0.359085,0.386732,0.390476,0.405387,0.415979,0.43131,0.444287,0.452781,0.456237,0.464449,0.46796,0.477302,0.482771,0.488934,0.495389,0.50004,0.509267,0.511139,0.513168,0.521635,0.522388,0.52348,0.524909,0.528353,0.532809,0.534409,0.536096,0.541402,0.543096,0.544218,0.549014,0.557489,0.561005,0.56691,0.57146,0.575501,0.579341,0.582255,0.586476,0.592978,0.597525,0.597903,0.603812,0.610836,0.615851,0.622983,0.629389,0.639957,0.65397,0.659438,0.663116,0.67485,0.68259,0.688169,0.693111,0.694488,0.699781,0.702592,0.707336,0.710641,0.733625,0.750316,0.755498,0.758281,0.77156,0.786199,0.790469,0.791362,0.809808,0.827586,0.84378,0.924981,1.108323,1.803599,3.268374
+cate1:str_day1	100	0.035608,0.03656,0.037703,0.041586,0.042091,0.043074,0.046,0.046257,0.046782,0.047716,0.047823,0.048384,0.048981,0.04941,0.052262,0.053882,0.05913,0.060194,0.064039,0.066976,0.074171,0.081139
+action:rov_day7	100	0.01034,0.01391,0.015094,0.01703,0.019049,0.019896,0.020764,0.021611,0.023042,0.024871,0.025981,0.027062,0.028324,0.029674,0.030081,0.030522,0.031056,0.031481,0.032027,0.032543,0.033259,0.033448,0.034077,0.034491,0.035017,0.035842,0.036802,0.037338,0.03786,0.039083,0.039618,0.039713,0.040451,0.04097,0.041458,0.041793,0.04232,0.043376,0.044168,0.044634,0.045382,0.046262,0.046521,0.046571,0.047264,0.047882,0.048069,0.048778,0.049052,0.049129,0.049817,0.050058,0.050329,0.050754,0.050921,0.051414,0.051769,0.051897,0.052922,0.053216,0.053455,0.053699,0.055632,0.055906,0.056628,0.056761,0.057194,0.057487,0.058291,0.058677,0.058729,0.059125,0.059958,0.062278,0.062582,0.063155,0.064239,0.066725,0.067458,0.06892,0.070718,0.073396,0.075292,0.080075,0.082271,0.083973,10.538462
+cate1:rov_day7	100	0.024745,0.032212,0.036666,0.042028,0.042247,0.042418,0.042667,0.043535,0.044368,0.044822,0.045227,0.046451,0.047062,0.047219,0.04723,0.04767,0.050948,0.05276,0.054466,0.060435,0.06132,0.061887
+cate1:rov_day1	100	0.02241,0.027995,0.032978,0.039364,0.041151,0.041269,0.04214,0.04282,0.043408,0.043561,0.04411,0.046826,0.048145,0.049265,0.050366,0.051036,0.052371,0.054471,0.057223,0.0585,0.063412,0.068202,0.069998
+cate2:ros_day1	100	0.337544,0.361674,0.507606,0.525643,0.603445,0.69085,0.737146,0.746133,0.823211,0.841549,0.850929,0.871358,0.921015,0.939885,0.947934,1.03452,1.135761,1.18473,1.206837,1.217894,1.275245,1.287086,1.290502,1.358827,1.369865,1.436415,1.694856
+cate2:str_day3	100	0.034438,0.039306,0.039348,0.040144,0.040957,0.04204,0.042171,0.043322,0.043777,0.044224,0.044777,0.044981,0.045699,0.046082,0.047884,0.049615,0.052909,0.055292,0.065738,0.066415,0.0721,0.080923,0.082512,0.087943,0.088704
+cate2:vovd1_day7	100	0.34348,0.362372,0.362591,0.373599,0.40264,0.412673,0.426541,0.44663,0.452845,0.459098,0.463879,0.482624,0.490028,0.493596,0.493725,0.499914,0.502578,0.509512,0.513889,0.5156,0.523791,0.530674,0.540608,0.547378,0.561814,0.574644
+cate2:vovd1_day30	100	0.370054,0.370944,0.394936,0.443582,0.453913,0.473622,0.481066,0.482661,0.486071,0.48982,0.491756,0.49963,0.506523,0.513591,0.514496,0.518633,0.518859,0.529267,0.532879,0.537233,0.542512,0.551456,0.554816,0.564547,0.584296,0.603374,0.62894,0.681393,0.685266
+cate2:rov_day30	100	0.032679,0.033632,0.037525,0.038707,0.038893,0.043666,0.044125,0.045367,0.045855,0.046259,0.046721,0.046815,0.046942,0.047222,0.047335,0.04908,0.050429,0.051339,0.052511,0.052594,0.054189,0.055182,0.055581,0.058821,0.05887,0.063536
+action:ros_day21	100	0.25,0.326225,0.348096,0.387535,0.388889,0.418738,0.452126,0.461137,0.465116,0.479065,0.488722,0.506973,0.516129,0.53554,0.543434,0.545428,0.567845,0.597696,0.603483,0.604136,0.609922,0.616497,0.631402,0.650833,0.672269,0.686258,0.698988,0.708627,0.720015,0.730707,0.749786,0.766101,0.778018,0.786408,0.814338,0.81546,0.820637,0.840838,0.857774,0.860068,0.875434,0.890433,0.899405,0.908121,0.913318,0.922581,0.925607,0.939113,0.956125,0.965045,0.966331,0.98619,1.004505,1.015347,1.017194,1.018069,1.023735,1.054629,1.062168,1.096155,1.107936,1.114462,1.131766,1.170002,1.184152,1.187872,1.20162,1.209869,1.210319,1.217177,1.250679,1.278414,1.302882,1.31542,1.366255,1.383904,1.418945,1.451771,1.474234,1.542568,1.641522,1.644686,1.793735,49.0
+action:str_day336	100	0.02738,0.029479,0.031359,0.032522,0.033286,0.0335,0.03405,0.034097,0.034306,0.034368,0.034589,0.034939,0.03516,0.03518,0.035215,0.035986,0.036327,0.037134,0.037757,0.03829,0.038424,0.03905,0.039151,0.039677,0.039866,0.040019,0.040387,0.040565,0.040858,0.041606,0.041796,0.042187,0.042902,0.043471,0.043834,0.043906,0.04419,0.044519,0.044874,0.045218,0.045535,0.045581,0.046128,0.046208,0.046557,0.047247,0.04793,0.048744,0.04893,0.049516,0.050248,0.051281,0.052182,0.052203,0.052537,0.053345,0.054038,0.054707,0.055946,0.056677,0.056984,0.057633,0.058985,0.059659,0.060183,0.061499,0.061644,0.06314,0.064913,0.066172,0.066266,0.069339,0.070702,0.071408,0.074793,0.079662,0.083598,0.087618,0.088682,0.094049,0.102991,1.278912
+cate1:vovd1_day1	100	0.243819,0.33983,0.381544,0.414537,0.441132,0.44991,0.467233,0.469086,0.469838,0.478436,0.48103,0.519413,0.522035,0.523529,0.526829,0.53239,0.565363,0.574103,0.575852,0.706623,0.743416,0.912354
+cate1:vovd1_day3	100	0.247172,0.315605,0.373584,0.411903,0.417485,0.445449,0.452683,0.460663,0.468618,0.47318,0.473297,0.475075,0.488597,0.492541,0.509136,0.519506,0.524279,0.555438,0.589867,0.593366,0.653946,0.685791,0.798356
+cate1:vovd1_day7	100	0.313507,0.402376,0.429809,0.433815,0.448448,0.452044,0.456199,0.461723,0.468483,0.469553,0.473863,0.482026,0.486893,0.491812,0.505874,0.513677,0.521871,0.547924,0.585771,0.590815,0.623261,0.654694,0.69024
+cate1:ros_day30	100	0.441491,0.568913,0.775484,0.780939,0.798579,0.802548,0.869337,0.893388,0.928003,0.928705,0.952479,0.986266,0.994482,1.022163,1.022553,1.054331,1.085911,1.143696,1.176906,1.193622,1.227002,1.294849,1.402791
+action:str_day1	100	0.017182,0.021658,0.024056,0.024947,0.026515,0.027719,0.028949,0.030184,0.031023,0.032131,0.032741,0.032919,0.033789,0.034893,0.03506,0.035717,0.035872,0.035971,0.036068,0.036502,0.037155,0.037493,0.037617,0.037677,0.038439,0.039101,0.03987,0.040611,0.040762,0.041337,0.041624,0.042696,0.043047,0.043157,0.043652,0.043788,0.044368,0.044853,0.045105,0.045521,0.045849,0.046234,0.046542,0.046564,0.046948,0.046957,0.047253,0.047995,0.048265,0.049213,0.049896,0.05029,0.051602,0.052819,0.05361,0.054495,0.05495,0.055071,0.055349,0.056748,0.056807,0.057858,0.059144,0.060907,0.062318,0.063704,0.06395,0.065497,0.066351,0.067175,0.067752,0.069989,0.070459,0.073915,0.074536,0.077093,0.078269,0.078535,0.081148,0.082419,0.087351,0.091977,0.092959,0.09716,0.098375,0.102524,0.111731,0.118182,8.0
+cate2:ros_day30	100	0.436914,0.711028,0.761409,0.767417,0.784233,0.812472,0.829719,0.894708,0.896962,0.913583,0.935544,0.951323,0.969855,1.025526,1.035148,1.043186,1.047615,1.106437,1.114378,1.120517,1.267013,1.291652,1.310473,1.429622,1.479188
+cate2:vovd1_day1	100	0.281409,0.303588,0.314226,0.363677,0.384621,0.386282,0.392412,0.422817,0.432701,0.434989,0.444593,0.472166,0.48974,0.491061,0.506301,0.507795,0.518586,0.522074,0.528385,0.52888,0.538166,0.557429,0.580056,0.604966,0.618626,0.639548,0.661279
+action:ros_day1	100	0.183673,0.267087,0.283582,0.29952,0.32293,0.335807,0.357913,0.366408,0.394834,0.41841,0.425856,0.434759,0.45159,0.470827,0.480248,0.494245,0.501451,0.516682,0.530424,0.558152,0.574468,0.586611,0.60972,0.618894,0.626596,0.643861,0.652411,0.656521,0.686747,0.708333,0.728519,0.746479,0.756289,0.773006,0.790535,0.80349,0.816901,0.836454,0.847134,0.860111,0.880247,0.884846,0.893939,0.900835,0.905669,0.921922,0.93188,0.955056,0.975232,0.988048,0.994495,1.005038,1.016669,1.021788,1.022431,1.041484,1.068822,1.071901,1.086919,1.108568,1.124112,1.156554,1.170245,1.22768,1.267618,1.26937,1.27728,1.311524,1.318015,1.329235,1.333881,1.366553,1.393993,1.432842,1.451709,1.481995,1.498601,1.505104,1.52908,1.571429,1.603306,1.616897,1.686355,1.694509,1.769295,1.813338,1.965463,2.047187,56.0
+action:str_day7	100	0.023224,0.026685,0.02873,0.030093,0.03061,0.031909,0.03291,0.033514,0.03385,0.034937,0.035331,0.036085,0.037222,0.037941,0.038227,0.038552,0.038784,0.039479,0.039561,0.040347,0.040505,0.041353,0.041375,0.041727,0.041964,0.042165,0.042767,0.043553,0.043982,0.044193,0.045483,0.046125,0.046255,0.046875,0.046898,0.047115,0.047583,0.047815,0.048015,0.048161,0.048594,0.04916,0.049646,0.050733,0.051031,0.052017,0.052151,0.053091,0.053144,0.053185,0.053969,0.054389,0.054709,0.055528,0.055643,0.056086,0.05609,0.056172,0.056597,0.058523,0.059413,0.060062,0.061936,0.0638,0.064117,0.064689,0.066675,0.067578,0.068979,0.069428,0.070625,0.072706,0.074228,0.07642,0.079166,0.083325,0.085863,0.086148,0.089955,0.09036,0.092134,0.094142,0.100913,0.106385,0.126716,0.142473,12.333333
+cate1:str_day30	100	0.038686,0.043294,0.043635,0.044014,0.044142,0.045435,0.045551,0.045938,0.046901,0.04757,0.048139,0.050299,0.050991,0.051885,0.052637,0.052698,0.053114,0.054098,0.060293,0.062666,0.063203,0.070568,0.072972,7.0
+cate1:rov_day30	100	0.031155,0.03561,0.041354,0.041434,0.041515,0.045137,0.045911,0.046177,0.046642,0.047085,0.047451,0.047478,0.048904,0.049013,0.049338,0.050136,0.050241,0.050293,0.053031,0.053143,0.053609,0.054005,0.055094,0.059483,0.060733
+cate2:vovd1_day3	100	0.310271,0.326185,0.340636,0.36469,0.364953,0.372978,0.393317,0.39418,0.432888,0.435147,0.449545,0.453187,0.467103,0.479693,0.499769,0.503452,0.510421,0.516627,0.519987,0.540275,0.545786,0.569572,0.58257,0.607636
+action:vovd1_day21	100	0.143826,0.199765,0.211395,0.22186,0.251534,0.26505,0.280154,0.285975,0.295704,0.305278,0.313966,0.32,0.322162,0.322216,0.327358,0.330584,0.337537,0.343099,0.343639,0.350845,0.354745,0.358808,0.365962,0.371193,0.371314,0.377571,0.382067,0.388004,0.39565,0.399993,0.403695,0.408392,0.414161,0.417704,0.419192,0.425099,0.430666,0.43259,0.438455,0.443279,0.447044,0.447127,0.451774,0.461519,0.461607,0.472118,0.473333,0.479428,0.483648,0.494486,0.499278,0.500678,0.50193,0.506677,0.507536,0.509696,0.519308,0.527555,0.529263,0.52988,0.539124,0.543133,0.544893,0.547973,0.551012,0.566678,0.573958,0.586334,0.5894,0.598029,0.610941,0.611801,0.620005,0.640163,0.667947,0.698996,0.704467,0.745519,0.761945,0.821366,0.844093,0.851573,158.733333
+cate2:rov_day7	100	0.029853,0.031978,0.033098,0.033441,0.036888,0.038013,0.038902,0.04181,0.043898,0.044573,0.045256,0.045799,0.045926,0.046316,0.046768,0.047087,0.048373,0.049034,0.049267,0.05026,0.051588,0.051632,0.056584,0.05666,0.06822
+cate2:rov_day3	100	0.026846,0.027896,0.029803,0.031285,0.032872,0.033258,0.034565,0.034616,0.035888,0.037362,0.03945,0.041853,0.045321,0.046555,0.047878,0.0479,0.050589,0.051679,0.052187,0.053566,0.057363,0.059005,0.059661,0.078679
+cate1:ros_day7	100	0.36987,0.635802,0.653971,0.65945,0.761121,0.766711,0.786459,0.838096,0.855184,0.856368,0.868264,0.922251,0.980311,0.998596,1.098859,1.111132,1.140248,1.222733,1.228546,1.26638,1.323897,1.333723
+cate2:ros_day3	100	0.314491,0.338998,0.461697,0.521202,0.61426,0.649795,0.715105,0.723817,0.73155,0.779485,0.791123,0.823861,0.868049,0.905328,0.985858,1.043391,1.05927,1.107022,1.151774,1.159685,1.168975,1.175908,1.180504,1.236465,1.275264,1.332397,1.469523
+cate2:str_day7	100	0.038052,0.038319,0.039771,0.040604,0.042651,0.043059,0.043354,0.043932,0.04456,0.044671,0.044821,0.044994,0.045327,0.046172,0.046423,0.046512,0.047975,0.048554,0.05444,0.063758,0.068376,0.07106,0.075212,0.08189,0.094847
+action:rov_day336	100	0.01523,0.023268,0.024964,0.02659,0.02806,0.030774,0.033211,0.034198,0.03579,0.036683,0.037524,0.038079,0.038949,0.039264,0.039876,0.041031,0.04194,0.04313,0.044339,0.04473,0.04524,0.0458,0.046336,0.047385,0.047414,0.047742,0.047849,0.04819,0.048751,0.0488,0.049233,0.050037,0.050601,0.050731,0.050742,0.050752,0.05093,0.051239,0.051846,0.05241,0.052783,0.05318,0.053545,0.054086,0.054668,0.054811,0.05564,0.056391,0.057146,0.057535,0.058026,0.058805,0.059002,0.060311,0.061089,0.061712,0.06275,0.063193,0.063433,0.064061,0.064186,0.065245,0.066319,0.067592,0.068102,0.06914,0.069307,0.070297,0.071266,0.072067,0.072495,0.074195,0.07496,0.075024,0.075367,0.078648,0.079371,0.081295,0.083006,0.090802,0.103937,0.206944,0.573067
+action:ros_day336	100	0.328571,0.381047,0.444745,0.502823,0.534658,0.538354,0.556676,0.576444,0.601064,0.653501,0.678965,0.693712,0.715483,0.731941,0.739905,0.757833,0.763784,0.784082,0.803892,0.821911,0.847305,0.876507,0.898466,0.907165,0.933088,0.960244,0.977362,0.981116,0.996465,1.011519,1.012077,1.031927,1.055017,1.057087,1.060222,1.065558,1.076977,1.08792,1.101198,1.103376,1.113232,1.127524,1.142515,1.146072,1.16793,1.18965,1.211578,1.245118,1.26929,1.278694,1.281102,1.317699,1.354536,1.41129,1.419912,1.424589,1.453609,1.481606,1.514957,1.532746,1.55388,1.574778,1.639601,1.703974,1.751432,1.776608,1.803191,1.820939,1.822301,1.823567,1.833974,1.862946,1.899702,1.988003,2.083395,2.313638,2.334647,3.364988,10.0
+action:vovd1_day7	100	0.126628,0.196744,0.233821,0.244323,0.261744,0.279516,0.297896,0.305715,0.31759,0.329781,0.339474,0.340811,0.352666,0.36415,0.374162,0.38207,0.389253,0.398857,0.402973,0.4053,0.413171,0.420005,0.424581,0.432343,0.438837,0.445243,0.448888,0.45374,0.459041,0.461213,0.46797,0.471349,0.475773,0.480474,0.485907,0.491877,0.495604,0.498382,0.50138,0.502244,0.510647,0.51313,0.515434,0.519381,0.523047,0.52421,0.527484,0.531474,0.540303,0.550432,0.553808,0.555702,0.560303,0.567848,0.57056,0.573226,0.574231,0.580617,0.585804,0.598126,0.611751,0.61233,0.619873,0.622942,0.629072,0.630574,0.638138,0.6527,0.653959,0.659965,0.662826,0.665898,0.676628,0.691067,0.69406,0.697332,0.697739,0.708397,0.720571,0.739653,0.774368,0.788233,0.974022,111.875
+cate2:rov_day1	100	0.025067,0.026166,0.0278,0.030741,0.033808,0.033995,0.037557,0.037572,0.038997,0.040033,0.041047,0.044538,0.045117,0.047059,0.048886,0.049236,0.04928,0.052352,0.053487,0.056874,0.059551,0.060377,0.063341,0.065841,0.095622
+action:rov_day1	100	0.006645,0.009819,0.012993,0.014619,0.016387,0.016408,0.017857,0.019324,0.019993,0.020916,0.022646,0.023219,0.024355,0.025016,0.02634,0.027321,0.028564,0.028811,0.029694,0.030567,0.031885,0.032992,0.033882,0.034343,0.035427,0.036889,0.038287,0.039122,0.039708,0.040132,0.040317,0.040995,0.041873,0.042173,0.0426,0.042731,0.043056,0.043605,0.044079,0.044699,0.044941,0.045541,0.046139,0.046593,0.04694,0.048383,0.048699,0.048901,0.048975,0.049173,0.049504,0.049659,0.050326,0.050395,0.050469,0.050889,0.051176,0.051433,0.052146,0.052166,0.052568,0.052709,0.053008,0.053938,0.054907,0.055937,0.056182,0.056887,0.057124,0.057573,0.059021,0.059079,0.059399,0.059889,0.060132,0.060653,0.063472,0.063576,0.065285,0.068285,0.07037,0.072983,0.073639,0.075258,0.084614,0.091848,0.100404,7.0
+cate1:ros_day1	100	0.379002,0.519355,0.586693,0.66865,0.69915,0.811369,0.84014,0.84403,0.870042,0.874672,0.897146,0.954623,1.00774,1.103063,1.143832,1.176866,1.183983,1.196572,1.211117,1.22336,1.364289,1.395947,1.474438,1.496271
+cate1:str_day3	100	0.036074,0.038626,0.039479,0.040242,0.040474,0.043725,0.043868,0.044441,0.045543,0.045714,0.046753,0.046838,0.046972,0.047257,0.050513,0.053765,0.055499,0.058611,0.060402,0.063202,0.065406,0.066702,0.074332,0.084941
+cate2:ros_day7	100	0.390503,0.429215,0.578565,0.658154,0.716623,0.752324,0.754474,0.776025,0.804576,0.844612,0.871489,0.885504,0.888563,0.913008,0.992856,1.012344,1.049471,1.081793,1.102885,1.11219,1.121338,1.127939,1.163207,1.164573,1.259271,1.394579
+cate2:str_day30	100	0.036034,0.037306,0.040884,0.042769,0.044354,0.044598,0.044628,0.04554,0.045578,0.045732,0.045999,0.046651,0.046863,0.04728,0.049194,0.050416,0.051813,0.052461,0.056434,0.061652,0.061676,0.063805,0.070873,0.076313,0.076976

+ 94 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_dssm/makedata_i2i_01_originData_20241127.scala

@@ -0,0 +1,94 @@
+package com.aliyun.odps.spark.examples.makedata_dssm
+
+import com.alibaba.fastjson.{JSON, JSONObject}
+import com.aliyun.odps.TableSchema
+import com.aliyun.odps.data.Record
+import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env}
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.sql.SparkSession
+import scala.util.Random
+import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+object makedata_i2i_01_originData_20241127 {
+  def func(record: Record, schema: TableSchema): Record = {
+    record
+  }
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // 1 读取参数
+    val param = ParamUtils.parseArgs(args)
+    val tablePart = param.getOrElse("tablePart", "64").toInt
+    val beginStr = param.getOrElse("beginStr", "2024062008")
+    val endStr = param.getOrElse("endStr", "2024062023")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/51_dssm_i2i_sample/")
+    val project = param.getOrElse("project", "loghubods")
+    val repartition = param.getOrElse("repartition", "100").toInt
+    val filterHours = param.getOrElse("filterHours", "25").split(",").toSet
+    val negCnt = param.getOrElse("negCnt", "20").toInt
+    // 2 读取odps+表信息
+    val odpsOps = env.getODPS(sc)
+    // 3 循环执行数据生产
+    val timeRange = MyDateUtils.getDateHourRange(beginStr, endStr)
+    for (dt_hh <- timeRange) {
+      val dt = dt_hh.substring(0, 8)
+      val hh = dt_hh.substring(8, 10)
+      val partition = s"dt=$dt,hh=$hh"
+      val vidsArr = odpsOps.readTable(project = project,
+          table = "t_vid_tag_feature",
+          partition = s"dt=$dt",
+          transfer = func,
+          numPartition = tablePart)
+        .map(r => {
+          r.getString("vid")
+        }).collect().toList
+      val vids_br = sc.broadcast(vidsArr)
+      if (filterHours.nonEmpty && filterHours.contains(hh)) {
+        println("不执行partiton:" + partition)
+      } else {
+        println("开始执行partiton:" + partition)
+        val odpsData = odpsOps.readTable(project = project,
+          table = "alg_dssm_sample",
+          partition = partition,
+          transfer = func,
+          numPartition = tablePart)
+          .map(record =>{
+            val apptype = record.getString("apptype")
+            val pagesource = record.getString("pagesource")
+            val mid = record.getString("mid")
+            val vid_right = record.getString("vid_right")
+            val vid_left = record.getString("vid_left")
+            val total_return_uv = record.getString("total_return_uv")
+            val view_24h = record.getString("view_24h")
+            val logKey = (apptype, pagesource, mid, vid_right, vid_left, total_return_uv, view_24h).productIterator.mkString(",")
+            (logKey, vid_left, vid_right)
+          }).mapPartitions(row =>{
+            val result = new ArrayBuffer[String]()
+            val vids = vids_br.value
+            row.foreach {
+              case (logKey, vid_left, vid_right) =>
+                val negs = Random.shuffle(vids).take(negCnt).filter(r => !r.equals(vid_left) && !r.equals(vid_right))
+                negs.foreach(negVid =>{
+                  result.add((logKey, "0", vid_left, negVid).productIterator.mkString("\t"))
+                })
+                result.add((logKey, "1", vid_left, vid_right).productIterator.mkString("\t"))
+            }
+            result.iterator
+          })
+        val savePartition = dt + hh
+        val hdfsPath = savePath + "/" + savePartition
+        if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+          println("删除路径并开始数据写入:" + hdfsPath)
+          MyHdfsUtils.delete_hdfs_path(hdfsPath)
+          odpsData.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+        } else {
+          println("路径不合法,无法写入:" + hdfsPath)
+        }
+      }
+    }
+  }
+}

+ 158 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_dssm/makedata_i2i_02_joinFeatureData_20241128.scala

@@ -0,0 +1,158 @@
+package com.aliyun.odps.spark.examples.makedata_dssm
+
+import com.alibaba.fastjson.JSON
+import com.aliyun.odps.TableSchema
+import com.aliyun.odps.data.Record
+import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env}
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.sql.SparkSession
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+import scala.util.Random
+
+object makedata_i2i_02_joinFeatureData_20241128 {
+  def func(record: Record, schema: TableSchema): Record = {
+    record
+  }
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // 1 读取参数
+    val param = ParamUtils.parseArgs(args)
+    val tablePart = param.getOrElse("tablePart", "64").toInt
+    val beginStr = param.getOrElse("beginStr", "2024062008")
+    val endStr = param.getOrElse("endStr", "2024062023")
+    val readPath = param.getOrElse("readPath", "/dw/recommend/model/51_dssm_i2i_sample/")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/52_dssm_i2i_joinfeature/")
+    val project = param.getOrElse("project", "loghubods")
+    val repartition = param.getOrElse("repartition", "100").toInt
+    val filterHours = param.getOrElse("filterHours", "25").split(",").toSet
+    val ifDebug = param.getOrElse("ifDebug", "false").toBoolean
+    // 2 读取odps+表信息
+    val odpsOps = env.getODPS(sc)
+    // 3 循环执行数据生产
+    val timeRange = MyDateUtils.getDateHourRange(beginStr, endStr)
+    for (dt_hh <- timeRange) {
+      val dt = dt_hh.substring(0, 8)
+      val hh = dt_hh.substring(8, 10)
+      val partition = s"dt=$dt,hh=$hh"
+      // 1 类目特征用broadcast
+      val category1_br = sc.broadcast(
+        odpsOps.readTable(project = project,
+          table = "t_vid_l1_cat_stat_feature",
+          partition = s"dt=$dt",
+          transfer = func,
+          numPartition = tablePart)
+          .map(record =>{
+            val category = record.getString("category1")
+            val feature = record.getString("feature")
+            (category, feature)
+        }).collectAsMap()
+      )
+      val category2_br = sc.broadcast(
+        odpsOps.readTable(project = project,
+            table = "t_vid_l2_cat_stat_feature",
+            partition = s"dt=$dt",
+            transfer = func,
+            numPartition = tablePart)
+          .map(record => {
+            val category = record.getString("category2")
+            val feature = record.getString("feature")
+            (category, feature)
+          }).collectAsMap()
+      )
+      // 2 视频特征用join
+      val vidStaticFeature = odpsOps.readTable(project = project,
+        table = "t_vid_tag_feature",
+        partition = s"dt=$dt",
+        transfer = func,
+        numPartition = tablePart)
+      .map(record => {
+        val vid = record.getString("vid")
+        val feature = record.getString("feature")
+        (vid, feature)
+      })
+      val vidActionFeature = odpsOps.readTable(project = project,
+          table = "t_vid_stat_feature",
+          partition = s"dt=$dt",
+          transfer = func,
+          numPartition = tablePart)
+        .map(record => {
+          val vid = record.getString("vid")
+          val feature = record.getString("feature")
+          (vid, feature)
+        })
+
+      if (filterHours.nonEmpty && filterHours.contains(hh)) {
+        println("不执行partiton:" + partition)
+      } else {
+        println("开始执行partiton:" + partition)
+        val savePartition = dt + hh
+        val sampleData1 = sc.textFile( readPath + "/" + savePartition).map(r=>{
+          val rList = r.split("\t")
+          val logKey = rList(0)
+          val label = rList(1)
+          val vid_left = rList(2)
+          val vid_right = rList(3)
+          (vid_left, (logKey, label, vid_right))
+        }).leftOuterJoin(vidStaticFeature).map{
+          case (vid_left, ((logKey, label, vid_right), Some(feature))) =>
+            (vid_right, (logKey, label, vid_left, feature))
+          case (vid_left, ((logKey, label, vid_right), None)) =>
+            (vid_right, (logKey, label, vid_left, "{}"))
+        }.leftOuterJoin(vidStaticFeature).map{
+          case (vid_right, ((logKey, label, vid_left, feature_left), Some(feature_right))) =>
+            (vid_left, (logKey, label, vid_right, feature_left, feature_right))
+          case (vid_right, ((logKey, label, vid_left, feature_left), None)) =>
+            (vid_left, (logKey, label, vid_right, feature_left, "{}"))
+        }.leftOuterJoin(vidActionFeature).map{
+          case (vid_left, ((logKey, label, vid_right, feature_left, feature_right), Some(feature))) =>
+            (vid_right, (logKey, label, vid_left, feature_left, feature_right, feature))
+          case (vid_left, ((logKey, label, vid_right, feature_left, feature_right), None)) =>
+            (vid_right, (logKey, label, vid_left, feature_left, feature_right, "{}"))
+        }.leftOuterJoin(vidActionFeature).map{
+          case (vid_right, ((logKey, label, vid_left, feature_left, feature_right, feature_left_action), Some(feature))) =>
+            (logKey, label, vid_left, vid_right, feature_left, feature_right, feature_left_action, feature)
+          case (vid_right, ((logKey, label, vid_left, feature_left, feature_right, feature_left_action), None)) =>
+            (logKey, label, vid_left, vid_right, feature_left, feature_right, feature_left_action, "{}")
+        }.mapPartitions(row =>{
+          val result = new ArrayBuffer[String]()
+          val category1 = category1_br.value
+          val category2 = category2_br.value
+          row.foreach{
+            case (logKey, label, vid_left, vid_right, feature_left, feature_right, feature_left_action, feature_right_action) =>
+              val cate1_left = JSON.parseObject(feature_left).getOrDefault("category1", "无").toString
+              val cate2_left = JSON.parseObject(feature_left).getOrDefault("category2_1", "无").toString
+              val cate1_right = JSON.parseObject(feature_right).getOrDefault("category1", "无").toString
+              val cate2_right = JSON.parseObject(feature_right).getOrDefault("category2_1", "无").toString
+              val feature_left_cate1 = category1.getOrElse(cate1_left, "{}")
+              val feature_left_cate2 = category2.getOrElse(cate2_left, "{}")
+              val feature_right_cate1 = category1.getOrElse(cate1_right, "{}")
+              val feature_right_cate2 = category2.getOrElse(cate2_right, "{}")
+              result.add(
+                (logKey, label, vid_left, vid_right, feature_left, feature_right, feature_left_action, feature_right_action,
+                feature_left_cate1, feature_right_cate1, feature_left_cate2, feature_right_cate2).productIterator.mkString("\t")
+              )
+          }
+          result.iterator
+        })
+        if (ifDebug){
+          println("数据量:" + sampleData1.count())
+        }
+        val hdfsPath = savePath + "/" + savePartition
+        if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+          println("删除路径并开始数据写入:" + hdfsPath)
+          MyHdfsUtils.delete_hdfs_path(hdfsPath)
+          sampleData1.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+        } else {
+          println("路径不合法,无法写入:" + hdfsPath)
+        }
+      }
+    }
+  }
+}

+ 119 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_dssm/makedata_i2i_03_onehotFile_20241128.scala

@@ -0,0 +1,119 @@
+package com.aliyun.odps.spark.examples.makedata_dssm
+
+import com.alibaba.fastjson.JSON
+import com.aliyun.odps.TableSchema
+import com.aliyun.odps.data.Record
+import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env}
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.sql.SparkSession
+import scala.collection.mutable
+import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+
+object makedata_i2i_03_onehotFile_20241128 {
+  def func(record: Record, schema: TableSchema): Record = {
+    record
+  }
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // 1 读取参数
+    val param = ParamUtils.parseArgs(args)
+    val tablePart = param.getOrElse("tablePart", "64").toInt
+    val dt = param.getOrElse("dt", "20240620")
+    val readPath = param.getOrElse("readPath", "/dw/recommend/model/53_dssm_i2i_onehot/20240101")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/53_dssm_i2i_onehot/20250101")
+    val project = param.getOrElse("project", "loghubods")
+    val repartition = param.getOrElse("repartition", "100").toInt
+    val ifDebug = param.getOrElse("ifDebug", "false").toBoolean
+    // 2 读取odps+表信息
+    val odpsOps = env.getODPS(sc)
+    // 3 vid中的sparse特征
+    if (MyHdfsUtils.hdfs_exits(readPath)){
+      val onehot = sc.textFile(readPath).map(r => {
+        val rList = r.split("\t")
+        (rList(0), rList(1))
+      })
+      val onehotMapOldStatic = onehot.map(_._1.split(":")(0)).map(r => (r, 1)).reduceByKey(_ + _).collect().sortBy(-_._2)
+      print(s"读入路径:$readPath \n数据量:" + onehot.count())
+      print("打印各个特征多少枚举值:")
+      onehotMapOldStatic.foreach(r => println(r.productIterator.mkString("\t")))
+    }
+    val onehotMap = if (MyHdfsUtils.hdfs_exits(readPath)){
+      mutable.Map(sc.textFile(readPath).map(r => {
+        val rList = r.split("\t")
+        (rList(0), rList(1))
+      }).collectAsMap().toSeq: _*)
+    }else{
+      mutable.Map[String, String]()
+    }
+
+    val vidStaticFeature = odpsOps.readTable(project = project,
+        table = "t_vid_tag_feature",
+        partition = s"dt=$dt",
+        transfer = func,
+        numPartition = tablePart)
+      .map(record => {
+        val vid = record.getString("vid")
+        val feature = record.getString("feature")
+        (vid, feature)
+      }).flatMap{
+        case (vid, feature) =>
+          val result = new ArrayBuffer[String]()
+          result.add("vid:" + vid)
+          JSON.parseObject(feature).foreach(r =>{
+            val value = if (r._2 == null) "无" else r._2.toString
+            r._1 match {
+              case "category1" => result += "cate1:" + value
+              case "category2_1" => result += "cate2:" + value
+              case "category2_2" => result += "cate2:" + value
+              case "category2_3" => result += "cate2:" + value
+              case "valid_time" => result += "valid_time:" + value
+              case "timeliness" => result += "timeliness:" + value
+              case "sentiment_tendency" => result += "sentiment_tendency:" + value
+              case "has_end_credit_guide" => result += "has_end_credit_guide:" + value
+              case "background_music_type" => result += "background_music_type:" + value
+              case "cover_persons_num" => result += "cover_persons_num:" + value
+              case "captions" => result += "captions:" + value
+              case "captions_color" => result += "captions_color:" + value
+              case "audience_value_type" => result += "audience_value_type:" + value
+              case "audience_gender" => result += "audience_gender:" + value
+              case "audience_age_group" => result += "audience_age_group:" + value
+              case "font_size" => result += "font_size:" + value
+              case "video_type" => result += "video_type:" + value
+              case "video_style" => result += "video_style:" + value
+              case _ =>
+            }
+          })
+          result.distinct
+      }.distinct().collect()
+    val maxEnumMap = mutable.Map[String, Int]()
+    onehotMap.foreach { case (key, value) =>
+      val prefix = key.split(":")(0)
+      val currentMax = maxEnumMap.getOrElse(prefix, 0)
+      maxEnumMap(prefix) = Math.max(currentMax, value.toInt)
+    }
+    vidStaticFeature.foreach { feature =>
+      val prefix = feature.split(":")(0)
+      if (!onehotMap.contains(feature)) {
+        val newEnumValue = maxEnumMap.getOrElse(prefix, 0) + 1
+        maxEnumMap(prefix) = newEnumValue
+        onehotMap(feature) = newEnumValue.toString
+      }
+    }
+
+    val dataRdd = sc.parallelize(onehotMap.toSeq.map(_.productIterator.mkString("\t")))
+    val hdfsPath = savePath
+    if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+      println("删除路径并开始数据写入:" + hdfsPath)
+      MyHdfsUtils.delete_hdfs_path(hdfsPath)
+      dataRdd.repartition(10).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+    } else {
+      println("路径不合法,无法写入:" + hdfsPath)
+    }
+  }
+}

+ 208 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_dssm/makedata_i2i_04_bucketFile_20241128.scala

@@ -0,0 +1,208 @@
+package com.aliyun.odps.spark.examples.makedata_dssm
+
+import com.alibaba.fastjson.JSON
+import com.aliyun.odps.TableSchema
+import com.aliyun.odps.data.Record
+import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env}
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.sql.SparkSession
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+import scala.util.Random
+
+object makedata_i2i_04_bucketFile_20241128 {
+  def func(record: Record, schema: TableSchema): Record = {
+    record
+  }
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // 1 读取参数
+    val param = ParamUtils.parseArgs(args)
+    val readPath = param.getOrElse("readPath", "/dw/recommend/model/52_dssm_i2i_joinfeature/20241128*")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/54_dssm_i2i_bucketfile/")
+    val fileName = param.getOrElse("fileName", "XXXXX")
+    val bucketNum = param.getOrElse("bucketNum", "100").toInt
+    // 3 循环执行数据生产
+    val data = sc.textFile(readPath).flatMap(r=>{
+      val rList = r.split("\t")
+      val f1 = rList(6)
+      val f11 = rList(7)
+      val f2 = rList(8)
+      val f22 = rList(9)
+      val f3 = rList(10)
+      val f33 = rList(11)
+      val result = new ArrayBuffer[(String, Double)]()
+      Set(f1, f11).toSeq.foreach(f=> {
+        if (f != null && f.nonEmpty){
+          val jsonOpt = Option(JSON.parseObject(f))
+          jsonOpt.foreach { json =>
+            if (json.nonEmpty) { // 确保 JSON 对象非空
+              json.foreach {
+                case (k, v) =>
+                  val value = try {
+                    v.toString.toDouble
+                  } catch {
+                    case _: Exception => 0D
+                  }
+                  k match {
+                    case "str_day1" => result += (("action:str_day1", value))
+                    case "rov_day1" => result += (("action:rov_day1", value))
+                    case "ros_day1" => result += (("action:ros_day1", value))
+                    case "str_day7" => result += (("action:str_day7", value))
+                    case "rov_day7" => result += (("action:rov_day7", value))
+                    case "ros_day7" => result += (("action:ros_day7", value))
+                    case "str_day21" => result += (("action:str_day21", value))
+                    case "rov_day21" => result += (("action:rov_day21", value))
+                    case "ros_day21" => result += (("action:ros_day21", value))
+                    case "str_day336" => result += (("action:str_day336", value))
+                    case "rov_day336" => result += (("action:rov_day336", value))
+                    case "ros_day336" => result += (("action:ros_day336", value))
+                    case "vovd1_day7" => result += (("action:vovd1_day7", value))
+                    case "vovd1_day21" => result += (("action:vovd1_day21", value))
+                    case "vovd1_day336" => result += (("action:vovd1_day336", value))
+                    case _ =>
+                  }
+              }
+            }
+          }
+        }
+      })
+      Set(f2, f22).toSeq.foreach(f => {
+        if (f != null && f.nonEmpty) {
+          val jsonOpt = Option(JSON.parseObject(f))
+          jsonOpt.foreach { json =>
+            if (json.nonEmpty) { // 确保 JSON 对象非空
+              json.foreach {
+                case (k, v) =>
+                  val value = try {
+                    v.toString.toDouble
+                  } catch {
+                    case _: Exception => 0D
+                  }
+                  k match {
+                    case "str_day1" => result += (("cate1:str_day1", value))
+                    case "rov_day1" => result += (("cate1:rov_day1", value))
+                    case "ros_day1" => result += (("cate1:ros_day1", value))
+                    case "str_day3" => result += (("cate1:str_day3", value))
+                    case "rov_day3" => result += (("cate1:rov_day3", value))
+                    case "ros_day3" => result += (("cate1:ros_day3", value))
+                    case "str_day7" => result += (("cate1:str_day7", value))
+                    case "rov_day7" => result += (("cate1:rov_day7", value))
+                    case "ros_day7" => result += (("cate1:ros_day7", value))
+                    case "str_day30" => result += (("cate1:str_day30", value))
+                    case "rov_day30" => result += (("cate1:rov_day30", value))
+                    case "ros_day30" => result += (("cate1:ros_day30", value))
+                    case "vovd1_day1" => result += (("cate1:vovd1_day1", value))
+                    case "vovd1_day3" => result += (("cate1:vovd1_day3", value))
+                    case "vovd1_day7" => result += (("cate1:vovd1_day7", value))
+                    case "vovd1_day30" => result += (("cate1:vovd1_day30", value))
+                    case _ =>
+                  }
+              }
+            }
+          }
+        }
+      })
+      Set(f3, f33).toSeq.foreach(f => {
+        if (f != null && f.nonEmpty) {
+          val jsonOpt = Option(JSON.parseObject(f))
+          jsonOpt.foreach { json =>
+            if (json.nonEmpty) { // 确保 JSON 对象非空
+              json.foreach {
+                case (k, v) =>
+                  val value = try {
+                    v.toString.toDouble
+                  } catch {
+                    case _: Exception => 0D
+                  }
+                  k match {
+                    case "str_day1" => result += (("cate2:str_day1", value))
+                    case "rov_day1" => result += (("cate2:rov_day1", value))
+                    case "ros_day1" => result += (("cate2:ros_day1", value))
+                    case "str_day3" => result += (("cate2:str_day3", value))
+                    case "rov_day3" => result += (("cate2:rov_day3", value))
+                    case "ros_day3" => result += (("cate2:ros_day3", value))
+                    case "str_day7" => result += (("cate2:str_day7", value))
+                    case "rov_day7" => result += (("cate2:rov_day7", value))
+                    case "ros_day7" => result += (("cate2:ros_day7", value))
+                    case "str_day30" => result += (("cate2:str_day30", value))
+                    case "rov_day30" => result += (("cate2:rov_day30", value))
+                    case "ros_day30" => result += (("cate2:ros_day30", value))
+                    case "vovd1_day1" => result += (("cate2:vovd1_day1", value))
+                    case "vovd1_day3" => result += (("cate2:vovd1_day3", value))
+                    case "vovd1_day7" => result += (("cate2:vovd1_day7", value))
+                    case "vovd1_day30" => result += (("cate2:vovd1_day30", value))
+                    case _ =>
+                  }
+              }
+            }
+          }
+        }
+      })
+      result
+    })
+
+    val contentList = List[String](
+      "action:str_day1", "action:rov_day1", "action:ros_day1", "action:str_day7", "action:rov_day7","action:ros_day7",
+      "action:str_day21", "action:rov_day21", "action:ros_day21", "action:str_day336","action:rov_day336", "action:ros_day336",
+      "action:vovd1_day7", "action:vovd1_day21", "action:vovd1_day336",
+
+      "cate1:str_day1", "cate1:rov_day1", "cate1:ros_day1", "cate1:str_day3", "cate1:rov_day3", "cate1:ros_day3",
+      "cate1:str_day7", "cate1:rov_day7", "cate1:ros_day7", "cate1:str_day30", "cate1:rov_day30", "cate1:ros_day30",
+      "cate1:vovd1_day1", "cate1:vovd1_day3", "cate1:vovd1_day7", "cate1:vovd1_day30",
+
+      "cate2:str_day1", "cate2:rov_day1", "cate2:ros_day1", "cate2:str_day3", "cate2:rov_day3", "cate2:ros_day3",
+      "cate2:str_day7", "cate2:rov_day7", "cate2:ros_day7", "cate2:str_day30", "cate2:rov_day30", "cate2:ros_day30",
+      "cate2:vovd1_day1", "cate2:vovd1_day3", "cate2:vovd1_day7", "cate2:vovd1_day30"
+
+
+    )
+    val result = new ArrayBuffer[String]()
+
+    for (i <- contentList.indices) {
+      println("特征:" + contentList(i))
+      val data2 = data.filter(_._1.equals(contentList(i))).map(_._2).filter(_ > 1E-8).collect().sorted
+      val len = data2.length
+      if (len == 0) {
+        result.add(contentList(i) + "\t" + bucketNum.toString + "\t" + "0")
+      } else {
+        val oneBucketNum = (len - 1) / (bucketNum - 1) + 1 // 确保每个桶至少有一个元素
+        val buffers = new ArrayBuffer[Double]()
+
+        var lastBucketValue = data2(0) // 记录上一个桶的切分点
+        for (j <- 0 until len by oneBucketNum) {
+          val d = data2(j)
+          if (j > 0 && d != lastBucketValue) {
+            // 如果当前切分点不同于上一个切分点,则保存当前切分点
+            buffers += d
+          }
+          lastBucketValue = d // 更新上一个桶的切分点
+        }
+
+        // 最后一个桶的结束点应该是数组的最后一个元素
+        if (!buffers.contains(data2.last)) {
+          buffers += data2.last
+        }
+        result.add(contentList(i) + "\t" + bucketNum.toString + "\t" + buffers.mkString(","))
+      }
+    }
+    val data3 = sc.parallelize(result)
+
+    // 4 保存数据到hdfs
+    val hdfsPath = savePath + "/" + fileName
+    if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+      println("删除路径并开始数据写入:" + hdfsPath)
+      MyHdfsUtils.delete_hdfs_path(hdfsPath)
+      data3.repartition(1).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+    } else {
+      println("路径不合法,无法写入:" + hdfsPath)
+    }
+
+  }
+}

+ 588 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_dssm/makedata_i2i_05_trainData_20241129.scala

@@ -0,0 +1,588 @@
+package com.aliyun.odps.spark.examples.makedata_dssm
+
+import com.alibaba.fastjson.JSON
+import com.alibaba.fastjson.JSONObject
+import com.aliyun.odps.TableSchema
+import com.aliyun.odps.data.Record
+import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env}
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.sql.SparkSession
+import examples.extractor.ExtractorUtils
+import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+import scala.io.Source
+
+object makedata_i2i_05_trainData_20241129 {
+  def func(record: Record, schema: TableSchema): Record = {
+    record
+  }
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // 1 读取参数
+    val param = ParamUtils.parseArgs(args)
+    val beginStr = param.getOrElse("beginStr", "2024062008")
+    val endStr = param.getOrElse("endStr", "2024062023")
+    val readPath = param.getOrElse("readPath", "/dw/recommend/model/52_dssm_i2i_joinfeature/")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/55_dssm_i2i_traindata/")
+    val onehotPath = param.getOrElse("onehotPath", "/dw/recommend/model/53_dssm_i2i_onehot/20241128")
+    val bucketFile = param.getOrElse("bucketFile", "20241128_recsys_i2i_bucket_47_v2.txt")
+    val repartition = param.getOrElse("repartition", "100").toInt
+    val filterHours = param.getOrElse("filterHours", "25").split(",").toSet
+    val ifDebug = param.getOrElse("ifDebug", "false").toBoolean
+
+    // 2 读取onehot文件
+    val onehotMap_br = sc.broadcast(
+      sc.textFile(onehotPath).map(r => {
+        val rList = r.split("\t")
+        (rList(0), rList(1))
+      }).collectAsMap()
+    )
+
+    // 3 读取dense分桶文件
+    val resourceUrlBucket = this.getClass.getClassLoader.getResource(bucketFile)
+    val buckets =
+      if (resourceUrlBucket != null) {
+        val buckets = Source.fromURL(resourceUrlBucket).getLines().mkString("\n")
+        Source.fromURL(resourceUrlBucket).close()
+        buckets
+      } else {
+        ""
+      }
+    println(buckets)
+    val bucketsMap_br = sc.broadcast(
+      buckets.split("\n")
+        .map(r => r.replace(" ", "").replaceAll("\n", ""))
+        .filter(r => r.nonEmpty)
+        .map(r => {
+          val rList = r.split("\t")
+          (rList(0), (rList(1).toDouble, rList(2).split(",").map(_.toDouble)))
+        }).toMap
+    )
+
+    // 4 循环执行数据生产
+    val timeRange = MyDateUtils.getDateHourRange(beginStr, endStr)
+    for (dt_hh <- timeRange) {
+      val dt = dt_hh.substring(0, 8)
+      val hh = dt_hh.substring(8, 10)
+      val data = sc.textFile(readPath + "/" + dt_hh).map(r=>{
+        val rList = r.split("\t")
+        val logKey = rList(0)
+        val label = rList(1)
+        val vid_left = rList(2)
+        val vid_right = rList(3)
+        val feature_left = rList(4)
+        val feature_right = rList(5)
+        val feature_left_action = rList(6)
+        val feature_right_action = rList(7)
+        val feature_left_cate1 = rList(8)
+        val feature_right_cate1 = rList(9)
+        val feature_left_cate2 = rList(10)
+        val feature_right_cate2 = rList(11)
+        (logKey, label, vid_left, vid_right, feature_left, feature_right, feature_left_action, feature_right_action,
+          feature_left_cate1, feature_right_cate1, feature_left_cate2, feature_right_cate2)
+      }).mapPartitions(row =>{
+        val result = new ArrayBuffer[String]()
+        val onehotMap = onehotMap_br.value
+        val bucketsMap = bucketsMap_br.value
+        row.foreach{
+          case (logKey, label, vid_left, vid_right, feature_left, feature_right, feature_left_action, feature_right_action,
+          feature_left_cate1, feature_right_cate1, feature_left_cate2, feature_right_cate2) =>
+            val left = new ArrayBuffer[String]()
+            val right = new ArrayBuffer[String]()
+            val left_dense1 = new ArrayBuffer[String]()
+            val right_dense1 = new ArrayBuffer[String]()
+            val left_dense2 = new ArrayBuffer[String]()
+            val right_dense2 = new ArrayBuffer[String]()
+            // 1 sparse 特征 16个
+            // vid cate1 cate2 video_style valid_time captions_color audience_age_group
+            // audience_value_type font_size cover_persons_num audience_gender sentiment_tendency
+            // video_type background_music_type captions has_end_credit_guide
+            left += onehotMap.getOrElse("vid:" + vid_left, "0")
+            right += onehotMap.getOrElse("vid:" + vid_right, "0")
+            var jsonLeft = JSON.parseObject(feature_left)
+            left += getOnehotValue(jsonLeft, onehotMap, "category1", "cate1:")
+            left += getOnehotValue(jsonLeft, onehotMap, "category2_1", "cate2:")
+            left += getOnehotValue(jsonLeft, onehotMap, "video_style", "video_style:")
+            left += getOnehotValue(jsonLeft, onehotMap, "valid_time", "valid_time:")
+            left += getOnehotValue(jsonLeft, onehotMap, "captions_color", "captions_color:")
+            left += getOnehotValue(jsonLeft, onehotMap, "audience_age_group", "audience_age_group:")
+            left += getOnehotValue(jsonLeft, onehotMap, "audience_value_type", "audience_value_type:")
+            left += getOnehotValue(jsonLeft, onehotMap, "font_size", "font_size:")
+            left += getOnehotValue(jsonLeft, onehotMap, "cover_persons_num", "cover_persons_num:")
+            left += getOnehotValue(jsonLeft, onehotMap, "audience_gender", "audience_gender:")
+            left += getOnehotValue(jsonLeft, onehotMap, "sentiment_tendency", "sentiment_tendency:")
+            left += getOnehotValue(jsonLeft, onehotMap, "video_type", "video_type:")
+            left += getOnehotValue(jsonLeft, onehotMap, "background_music_type", "background_music_type:")
+            left += getOnehotValue(jsonLeft, onehotMap, "captions", "captions:")
+            left += getOnehotValue(jsonLeft, onehotMap, "has_end_credit_guide", "has_end_credit_guide:")
+            var jsonRight = JSON.parseObject(feature_right)
+            right += getOnehotValue(jsonRight, onehotMap, "category1", "cate1:")
+            right += getOnehotValue(jsonRight, onehotMap, "category2_1", "cate2:")
+            right += getOnehotValue(jsonRight, onehotMap, "video_style", "video_style:")
+            right += getOnehotValue(jsonRight, onehotMap, "valid_time", "valid_time:")
+            right += getOnehotValue(jsonRight, onehotMap, "captions_color", "captions_color:")
+            right += getOnehotValue(jsonRight, onehotMap, "audience_age_group", "audience_age_group:")
+            right += getOnehotValue(jsonRight, onehotMap, "audience_value_type", "audience_value_type:")
+            right += getOnehotValue(jsonRight, onehotMap, "font_size", "font_size:")
+            right += getOnehotValue(jsonRight, onehotMap, "cover_persons_num", "cover_persons_num:")
+            right += getOnehotValue(jsonRight, onehotMap, "audience_gender", "audience_gender:")
+            right += getOnehotValue(jsonRight, onehotMap, "sentiment_tendency", "sentiment_tendency:")
+            right += getOnehotValue(jsonRight, onehotMap, "video_type", "video_type:")
+            right += getOnehotValue(jsonRight, onehotMap, "background_music_type", "background_music_type:")
+            right += getOnehotValue(jsonRight, onehotMap, "captions", "captions:")
+            right += getOnehotValue(jsonRight, onehotMap, "has_end_credit_guide", "has_end_credit_guide:")
+            // 2 dense通过分桶转换成sparse特征 47个 * 3 * 2
+            jsonLeft = JSON.parseObject(feature_left_action)
+            var res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day1", "action:str_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day1", "action:rov_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day1", "action:ros_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day7", "action:str_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day7", "action:rov_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day7", "action:ros_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day21", "action:str_day21")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day21", "action:rov_day21")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day21", "action:ros_day21")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day336", "action:str_day336")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day336", "action:rov_day336")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day336", "action:ros_day336")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day7", "action:vovd1_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day21", "action:vovd1_day21")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day336", "action:vovd1_day336")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+
+            jsonRight = JSON.parseObject(feature_right_action)
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day1", "action:str_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day1", "action:rov_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day1", "action:ros_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day7", "action:str_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day7", "action:rov_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day7", "action:ros_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day21", "action:str_day21")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day21", "action:rov_day21")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day21", "action:ros_day21")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day336", "action:str_day336")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day336", "action:rov_day336")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day336", "action:ros_day336")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day7", "action:vovd1_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day21", "action:vovd1_day21")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day336", "action:vovd1_day336")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+
+            //----------------------cate1-----------------------------cate1---------------------------cate1----------------------
+            jsonLeft = JSON.parseObject(feature_left_cate1)
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day1", "cate1:str_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day1", "cate1:rov_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day1", "cate1:ros_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day3", "cate1:str_day3")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day3", "cate1:rov_day3")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day3", "cate1:ros_day3")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day7", "cate1:str_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day7", "cate1:rov_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day7", "cate1:ros_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day30", "cate1:str_day30")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day30", "cate1:rov_day30")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day30", "cate1:ros_day30")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day1", "cate1:vovd1_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day3", "cate1:vovd1_day3")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day7", "cate1:vovd1_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day30", "cate1:vovd1_day30")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+
+            jsonRight = JSON.parseObject(feature_right_cate1)
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day1", "cate1:str_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day1", "cate1:rov_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day1", "cate1:ros_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day3", "cate1:str_day3")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day3", "cate1:rov_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day3", "cate1:ros_day3")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day7", "cate1:str_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day7", "cate1:rov_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day7", "cate1:ros_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day30", "cate1:str_day30")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day30", "cate1:rov_day30")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day30", "cate1:ros_day30")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day1", "cate1:vovd1_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day3", "cate1:vovd1_day3")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day7", "cate1:vovd1_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day30", "cate1:vovd1_day30")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+
+          //----------------------cate2-----------------------------cate2---------------------------cate2----------------------
+            jsonLeft = JSON.parseObject(feature_left_cate2)
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day1", "cate2:str_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day1", "cate2:rov_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day1", "cate2:ros_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day3", "cate2:str_day3")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day3", "cate2:rov_day3")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day3", "cate2:ros_day3")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day7", "cate2:str_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day7", "cate2:rov_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day7", "cate2:ros_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day30", "cate2:str_day30")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day30", "cate2:rov_day30")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day30", "cate2:ros_day30")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day1", "cate2:vovd1_day1")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day3", "cate2:vovd1_day3")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day7", "cate2:vovd1_day7")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day30", "cate2:vovd1_day30")
+            left += res._1.toString
+            left_dense1 += res._2.toString
+            left_dense2 += res._3.toString
+
+            jsonRight = JSON.parseObject(feature_right_cate2)
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day1", "cate2:str_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day1", "cate2:rov_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day1", "cate2:ros_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day3", "cate2:str_day3")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day3", "cate2:rov_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day3", "cate2:ros_day3")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day7", "cate2:str_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day7", "cate2:rov_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day7", "cate2:ros_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "str_day30", "cate2:str_day30")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "rov_day30", "cate2:rov_day30")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "ros_day30", "cate2:ros_day30")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day1", "cate2:vovd1_day1")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day3", "cate2:vovd1_day3")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day7", "cate2:vovd1_day7")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+            res = getDenseBucketValue(jsonRight, bucketsMap, "vovd1_day30", "cate2:vovd1_day30")
+            right += res._1.toString
+            right_dense1 += res._2.toString
+            right_dense2 += res._3.toString
+
+          // 3 left 和 right 分别 16+47*3=16+141 = 157
+            left ++= left_dense1
+            left ++= left_dense2
+            right ++= right_dense1
+            right ++= right_dense2
+
+            result.add(
+              (logKey, label, vid_left, vid_right, left.mkString(","), right.mkString(",")).productIterator.mkString("\t")
+            )
+        }
+        result.iterator
+      })
+
+      // 4 保存数据到hdfs
+      val hdfsPath = savePath + "/" + dt_hh
+      if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+        println("删除路径并开始数据写入:" + hdfsPath)
+        MyHdfsUtils.delete_hdfs_path(hdfsPath)
+        data.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+      } else {
+        println("路径不合法,无法写入:" + hdfsPath)
+      }
+    }
+  }
+
+  def getOnehotValue(obj: JSONObject, m: scala.collection.Map[String, String], key1: String, key2: String): String = {
+    if (obj.containsKey(key1)) {
+      val value1 = obj.get(key1)
+      val value2 = if (value1 == null) "无" else value1.toString
+      m.getOrElse(key2 + value2, "0")
+    } else {
+      "0"
+    }
+  }
+  def getDenseBucketValue(obj: JSONObject, bucketsMap: Map[String, (Double, Array[Double])], key1: String, name: String): (Int, Double, Double) = {
+    if (obj.containsKey(key1)) {
+      val value1 = obj.get(key1)
+      val score = try {
+        value1.toString.toDouble
+      } catch {
+        case _: Exception => 0D
+      }
+      if (score > 1E-8) {
+        if (bucketsMap.contains(name)) {
+          val (bucketsNum, buckets) = bucketsMap(name)
+          val index = ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0
+          val scoreNew = 1.0 / bucketsNum * index
+          (index.toInt, scoreNew, score)
+        } else {
+          (0, 0D, score)
+        }
+      } else {
+        (0, 0D, 0D)
+      }
+    } else {
+      (0, 0D, 0D)
+    }
+  }
+}

+ 374 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_dssm/makedata_i2i_06_itemPred_20241206.scala

@@ -0,0 +1,374 @@
+package com.aliyun.odps.spark.examples.makedata_dssm
+
+import com.alibaba.fastjson.JSON
+import com.aliyun.odps.TableSchema
+import com.aliyun.odps.data.Record
+import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env}
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.sql.SparkSession
+import com.aliyun.odps.spark.examples.makedata_dssm.makedata_i2i_05_trainData_20241129.{getOnehotValue, getDenseBucketValue}
+import scala.collection.mutable
+import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+import scala.io.Source
+
+object makedata_i2i_06_itemPred_20241206 {
+  def func(record: Record, schema: TableSchema): Record = {
+    record
+  }
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // 1 读取参数
+    val param = ParamUtils.parseArgs(args)
+    val tablePart = param.getOrElse("tablePart", "64").toInt
+    val dt = param.getOrElse("dt", "20240620")
+    val onehotPath = param.getOrElse("onehotPath", "/dw/recommend/model/53_dssm_i2i_onehot/20241128")
+    val bucketFile = param.getOrElse("bucketFile", "20241128_recsys_i2i_bucket_47_v2.txt")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/56_dssm_i2i_itempredData/")
+    val project = param.getOrElse("project", "loghubods")
+    val repartition = param.getOrElse("repartition", "100").toInt
+
+    // 2 读取onehot文件
+    val onehotMap_br = sc.broadcast(
+      sc.textFile(onehotPath).map(r => {
+        val rList = r.split("\t")
+        (rList(0), rList(1))
+      }).collectAsMap()
+    )
+    // 3 读取dense分桶文件
+    val resourceUrlBucket = this.getClass.getClassLoader.getResource(bucketFile)
+    val buckets =
+      if (resourceUrlBucket != null) {
+        val buckets = Source.fromURL(resourceUrlBucket).getLines().mkString("\n")
+        Source.fromURL(resourceUrlBucket).close()
+        buckets
+      } else {
+        ""
+      }
+    println(buckets)
+    val bucketsMap_br = sc.broadcast(
+      buckets.split("\n")
+        .map(r => r.replace(" ", "").replaceAll("\n", ""))
+        .filter(r => r.nonEmpty)
+        .map(r => {
+          val rList = r.split("\t")
+          (rList(0), (rList(1).toDouble, rList(2).split(",").map(_.toDouble)))
+        }).toMap
+    )
+
+    val odpsOps = env.getODPS(sc)
+    val category1_br = sc.broadcast(
+      odpsOps.readTable(project = project,
+          table = "t_vid_l1_cat_stat_feature",
+          partition = s"dt=$dt",
+          transfer = func,
+          numPartition = tablePart)
+        .map(record => {
+          val category = record.getString("category1")
+          val feature = record.getString("feature")
+          (category, feature)
+        }).collectAsMap()
+    )
+    val category2_br = sc.broadcast(
+      odpsOps.readTable(project = project,
+          table = "t_vid_l2_cat_stat_feature",
+          partition = s"dt=$dt",
+          transfer = func,
+          numPartition = tablePart)
+        .map(record => {
+          val category = record.getString("category2")
+          val feature = record.getString("feature")
+          (category, feature)
+        }).collectAsMap()
+    )
+    // 2 视频特征用join
+    val vidStaticFeature = odpsOps.readTable(project = project,
+        table = "t_vid_tag_feature",
+        partition = s"dt=$dt",
+        transfer = func,
+        numPartition = tablePart)
+      .map(record => {
+        val vid = record.getString("vid")
+        val feature = record.getString("feature")
+        (vid, feature)
+      })
+    val vidActionFeature = odpsOps.readTable(project = project,
+        table = "t_vid_stat_feature",
+        partition = s"dt=$dt",
+        transfer = func,
+        numPartition = tablePart)
+      .map(record => {
+        val vid = record.getString("vid")
+        val feature = record.getString("feature")
+        (vid, feature)
+      })
+
+    val data = vidStaticFeature.leftOuterJoin(vidActionFeature).map{
+      case (vid, (feature, Some(feature_action))) =>
+        (vid, (feature, feature_action))
+      case (vid, (feature, None)) =>
+        (vid, (feature, "{}"))
+    }.mapPartitions(row => {
+      val result = new ArrayBuffer[(String, (String, String, String, String))]()
+      val category1 = category1_br.value
+      val category2 = category2_br.value
+      row.foreach {
+        case (vid, (feature, feature_action)) =>
+          val cate1 = JSON.parseObject(feature).getOrDefault("category1", "无").toString
+          val cate2 = JSON.parseObject(feature).getOrDefault("category2_1", "无").toString
+          val feature_cate1 = category1.getOrElse(cate1, "{}")
+          val feature_cate2 = category2.getOrElse(cate2, "{}")
+          result.add((vid, (feature, feature_action, feature_cate1, feature_cate2)))
+      }
+      result.iterator
+    }).mapPartitions(row =>{
+      val result = new ArrayBuffer[String]()
+      val onehotMap = onehotMap_br.value
+      val bucketsMap = bucketsMap_br.value
+      row.foreach {
+        case (vid_left, (feature_left, feature_left_action, feature_left_cate1, feature_left_cate2)) =>
+          val left = new ArrayBuffer[String]()
+          val left_dense1 = new ArrayBuffer[String]()
+          val left_dense2 = new ArrayBuffer[String]()
+          // 1 sparse 特征 16个
+          // vid cate1 cate2 video_style valid_time captions_color audience_age_group
+          // audience_value_type font_size cover_persons_num audience_gender sentiment_tendency
+          // video_type background_music_type captions has_end_credit_guide
+          left += onehotMap.getOrElse("vid:" + vid_left, "0")
+          var jsonLeft = JSON.parseObject(feature_left)
+          left += getOnehotValue(jsonLeft, onehotMap, "category1", "cate1:")
+          left += getOnehotValue(jsonLeft, onehotMap, "category2_1", "cate2:")
+          left += getOnehotValue(jsonLeft, onehotMap, "video_style", "video_style:")
+          left += getOnehotValue(jsonLeft, onehotMap, "valid_time", "valid_time:")
+          left += getOnehotValue(jsonLeft, onehotMap, "captions_color", "captions_color:")
+          left += getOnehotValue(jsonLeft, onehotMap, "audience_age_group", "audience_age_group:")
+          left += getOnehotValue(jsonLeft, onehotMap, "audience_value_type", "audience_value_type:")
+          left += getOnehotValue(jsonLeft, onehotMap, "font_size", "font_size:")
+          left += getOnehotValue(jsonLeft, onehotMap, "cover_persons_num", "cover_persons_num:")
+          left += getOnehotValue(jsonLeft, onehotMap, "audience_gender", "audience_gender:")
+          left += getOnehotValue(jsonLeft, onehotMap, "sentiment_tendency", "sentiment_tendency:")
+          left += getOnehotValue(jsonLeft, onehotMap, "video_type", "video_type:")
+          left += getOnehotValue(jsonLeft, onehotMap, "background_music_type", "background_music_type:")
+          left += getOnehotValue(jsonLeft, onehotMap, "captions", "captions:")
+          left += getOnehotValue(jsonLeft, onehotMap, "has_end_credit_guide", "has_end_credit_guide:")
+          // 2 dense通过分桶转换成sparse特征 47个 * 3 * 2
+          jsonLeft = JSON.parseObject(feature_left_action)
+          var res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day1", "action:str_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day1", "action:rov_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day1", "action:ros_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day7", "action:str_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day7", "action:rov_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day7", "action:ros_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day21", "action:str_day21")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day21", "action:rov_day21")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day21", "action:ros_day21")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day336", "action:str_day336")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day336", "action:rov_day336")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day336", "action:ros_day336")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day7", "action:vovd1_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day21", "action:vovd1_day21")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day336", "action:vovd1_day336")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          //----------------------cate1-----------------------------cate1---------------------------cate1----------------------
+          jsonLeft = JSON.parseObject(feature_left_cate1)
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day1", "cate1:str_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day1", "cate1:rov_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day1", "cate1:ros_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day3", "cate1:str_day3")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day3", "cate1:rov_day3")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day3", "cate1:ros_day3")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day7", "cate1:str_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day7", "cate1:rov_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day7", "cate1:ros_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day30", "cate1:str_day30")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day30", "cate1:rov_day30")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day30", "cate1:ros_day30")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day1", "cate1:vovd1_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day3", "cate1:vovd1_day3")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day7", "cate1:vovd1_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day30", "cate1:vovd1_day30")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          //----------------------cate2-----------------------------cate2---------------------------cate2----------------------
+          jsonLeft = JSON.parseObject(feature_left_cate2)
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day1", "cate2:str_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day1", "cate2:rov_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day1", "cate2:ros_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day3", "cate2:str_day3")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day3", "cate2:rov_day3")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day3", "cate2:ros_day3")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day7", "cate2:str_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day7", "cate2:rov_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day7", "cate2:ros_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "str_day30", "cate2:str_day30")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "rov_day30", "cate2:rov_day30")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "ros_day30", "cate2:ros_day30")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day1", "cate2:vovd1_day1")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day3", "cate2:vovd1_day3")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day7", "cate2:vovd1_day7")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+          res = getDenseBucketValue(jsonLeft, bucketsMap, "vovd1_day30", "cate2:vovd1_day30")
+          left += res._1.toString
+          left_dense1 += res._2.toString
+          left_dense2 += res._3.toString
+
+          // 3 left 和 right 分别 16+47*3=16+141 = 157
+          left ++= left_dense1
+          left ++= left_dense2
+          result.add(
+            (vid_left, left.mkString(",")).productIterator.mkString("\t")
+          )
+      }
+      result.iterator
+    })
+
+
+    val hdfsPath = savePath + "/" + dt
+    if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+      println("删除路径并开始数据写入:" + hdfsPath)
+      MyHdfsUtils.delete_hdfs_path(hdfsPath)
+      data.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+    } else {
+      println("路径不合法,无法写入:" + hdfsPath)
+    }
+  }
+}

+ 280 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_61_originData_20241209.scala

@@ -0,0 +1,280 @@
+package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
+
+import com.alibaba.fastjson.{JSON, JSONObject}
+import com.aliyun.odps.TableSchema
+import com.aliyun.odps.data.Record
+import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env}
+import examples.extractor.RankExtractorFeature_20240530
+import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.spark.sql.SparkSession
+import org.xm.Similarity
+
+import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+/*
+
+ */
+
+object makedata_recsys_61_originData_20241209 {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName(this.getClass.getName)
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // 1 读取参数
+    val param = ParamUtils.parseArgs(args)
+    val tablePart = param.getOrElse("tablePart", "64").toInt
+    val beginStr = param.getOrElse("beginStr", "2023010100")
+    val endStr = param.getOrElse("endStr", "2023010123")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/61_origin_data/")
+    val project = param.getOrElse("project", "loghubods")
+    val table = param.getOrElse("table", "XXXX")
+    val repartition = param.getOrElse("repartition", "32").toInt
+
+    // 2 读取odps+表信息
+    val odpsOps = env.getODPS(sc)
+
+    // 3 循环执行数据生产
+    val timeRange = MyDateUtils.getDateHourRange(beginStr, endStr)
+    for (dt_hh <- timeRange) {
+      val dt = dt_hh.substring(0, 8)
+      val hh = dt_hh.substring(8, 10)
+      val partition = s"dt=$dt,hh=$hh"
+      println("开始执行partiton:" + partition)
+      val odpsData = odpsOps.readTable(project = project,
+          table = table,
+          partition = partition,
+          transfer = func,
+          numPartition = tablePart)
+        .map(record => {
+
+          val featureMap = new JSONObject()
+
+          // a 视频特征
+          val b1: JSONObject = if (record.isNull("b1_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b1_feature"))
+          val b2: JSONObject = if (record.isNull("b2_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b2_feature"))
+          val b3: JSONObject = if (record.isNull("b3_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b3_feature"))
+          val b6: JSONObject = if (record.isNull("b6_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b6_feature"))
+          val b7: JSONObject = if (record.isNull("b7_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b7_feature"))
+
+          val b8: JSONObject = if (record.isNull("b8_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b8_feature"))
+          val b9: JSONObject = if (record.isNull("b9_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b9_feature"))
+          val b10: JSONObject = if (record.isNull("b10_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b10_feature"))
+          val b11: JSONObject = if (record.isNull("b11_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b11_feature"))
+          val b12: JSONObject = if (record.isNull("b12_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b12_feature"))
+          val b13: JSONObject = if (record.isNull("b13_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b13_feature"))
+          val b17: JSONObject = if (record.isNull("b17_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b17_feature"))
+          val b18: JSONObject = if (record.isNull("b18_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b18_feature"))
+          val b19: JSONObject = if (record.isNull("b19_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("b19_feature"))
+
+
+          val origin_data = List(
+            (b1, b2, b3, "b123"), (b1, b6, b7, "b167"),
+            (b8, b9, b10, "b8910"), (b11, b12, b13, "b111213"),
+            (b17, b18, b19, "b171819")
+          )
+          for ((b_1, b_2, b_3, prefix1) <- origin_data) {
+            for (prefix2 <- List(
+              "1h", "2h", "3h", "4h", "12h", "1d", "3d", "7d"
+            )) {
+              val exp = if (b_1.isEmpty) 0D else b_1.getIntValue("exp_pv_" + prefix2).toDouble
+              val share = if (b_2.isEmpty) 0D else b_2.getIntValue("share_pv_" + prefix2).toDouble
+              val returns = if (b_3.isEmpty) 0D else b_3.getIntValue("return_uv_" + prefix2).toDouble
+              val f1 = RankExtractorFeature_20240530.calDiv(share, exp)
+              val f2 = RankExtractorFeature_20240530.calLog(share)
+              val f3 = RankExtractorFeature_20240530.calDiv(returns, exp)
+              val f4 = RankExtractorFeature_20240530.calLog(returns)
+              val f5 = f3 * f4
+              val f6 = RankExtractorFeature_20240530.calDiv(returns, share)
+              featureMap.put(prefix1 + "_" + prefix2 + "_" + "STR", f1)
+              featureMap.put(prefix1 + "_" + prefix2 + "_" + "log(share)", f2)
+              featureMap.put(prefix1 + "_" + prefix2 + "_" + "ROV", f3)
+              featureMap.put(prefix1 + "_" + prefix2 + "_" + "log(return)", f4)
+              featureMap.put(prefix1 + "_" + prefix2 + "_" + "ROV*log(return)", f5)
+              featureMap.put(prefix1 + "_" + prefix2 + "_" + "ROS", f6)
+            }
+          }
+
+          val video_info: JSONObject = if (record.isNull("t_v_info_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("t_v_info_feature"))
+          featureMap.put("total_time", if (video_info.containsKey("total_time")) video_info.getIntValue("total_time").toDouble else 0D)
+          featureMap.put("bit_rate", if (video_info.containsKey("bit_rate")) video_info.getIntValue("bit_rate").toDouble else 0D)
+
+          val c1: JSONObject = if (record.isNull("c1_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("c1_feature"))
+          if (c1.nonEmpty) {
+            featureMap.put("playcnt_6h", if (c1.containsKey("playcnt_6h")) c1.getIntValue("playcnt_6h").toDouble else 0D)
+            featureMap.put("playcnt_1d", if (c1.containsKey("playcnt_1d")) c1.getIntValue("playcnt_1d").toDouble else 0D)
+            featureMap.put("playcnt_3d", if (c1.containsKey("playcnt_3d")) c1.getIntValue("playcnt_3d").toDouble else 0D)
+            featureMap.put("playcnt_7d", if (c1.containsKey("playcnt_7d")) c1.getIntValue("playcnt_7d").toDouble else 0D)
+          }
+          val c2: JSONObject = if (record.isNull("c2_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("c2_feature"))
+          if (c2.nonEmpty) {
+            featureMap.put("share_pv_12h", if (c2.containsKey("share_pv_12h")) c2.getIntValue("share_pv_12h").toDouble else 0D)
+            featureMap.put("share_pv_1d", if (c2.containsKey("share_pv_1d")) c2.getIntValue("share_pv_1d").toDouble else 0D)
+            featureMap.put("share_pv_3d", if (c2.containsKey("share_pv_3d")) c2.getIntValue("share_pv_3d").toDouble else 0D)
+            featureMap.put("share_pv_7d", if (c2.containsKey("share_pv_7d")) c2.getIntValue("share_pv_7d").toDouble else 0D)
+            featureMap.put("return_uv_12h", if (c2.containsKey("return_uv_12h")) c2.getIntValue("return_uv_12h").toDouble else 0D)
+            featureMap.put("return_uv_1d", if (c2.containsKey("return_uv_1d")) c2.getIntValue("return_uv_1d").toDouble else 0D)
+            featureMap.put("return_uv_3d", if (c2.containsKey("return_uv_3d")) c2.getIntValue("return_uv_3d").toDouble else 0D)
+            featureMap.put("return_uv_7d", if (c2.containsKey("return_uv_7d")) c2.getIntValue("return_uv_7d").toDouble else 0D)
+          }
+
+          val title = if (video_info.containsKey("title")) video_info.getString("title") else ""
+          if (!title.equals("")) {
+            for (key_feature <- List("c3_feature", "c4_feature", "c5_feature", "c6_feature", "c7_feature")) {
+              val c34567: JSONObject = if (record.isNull(key_feature)) new JSONObject() else
+                JSON.parseObject(record.getString(key_feature))
+              for (key_time <- List("tags_1d", "tags_3d", "tags_7d")) {
+                val tags = if (c34567.containsKey(key_time)) c34567.getString(key_time) else ""
+                if (!tags.equals("")) {
+                  val (f1, f2, f3, f4) = funcC34567ForTags(tags, title)
+                  featureMap.put(key_feature + "_" + key_time + "_matchnum", f1)
+                  featureMap.put(key_feature + "_" + key_time + "_maxscore", f3)
+                  featureMap.put(key_feature + "_" + key_time + "_avgscore", f4)
+                }
+              }
+            }
+          }
+
+          val vid = if (record.isNull("vid")) "" else record.getString("vid")
+          if (!vid.equals("")) {
+            for (key_feature <- List("c8_feature", "c9_feature")) {
+              val c89: JSONObject = if (record.isNull(key_feature)) new JSONObject() else
+                JSON.parseObject(record.getString(key_feature))
+              for (key_action <- List("share", "return")) {
+                val cfListStr = if (c89.containsKey(key_action)) c89.getString(key_action) else ""
+                if (!cfListStr.equals("")) {
+                  val cfMap = cfListStr.split(",").map(r => {
+                    val rList = r.split(":")
+                    (rList(0), (rList(1), rList(2), rList(3)))
+                  }).toMap
+                  if (cfMap.contains(vid)) {
+                    val (score, num, rank) = cfMap(vid)
+                    featureMap.put(key_feature + "_" + key_action + "_score", score.toDouble)
+                    featureMap.put(key_feature + "_" + key_action + "_num", num.toDouble)
+                    featureMap.put(key_feature + "_" + key_action + "_rank", 1.0 / rank.toDouble)
+                  }
+                }
+              }
+            }
+          }
+
+          val d1: JSONObject = if (record.isNull("d1_feature")) new JSONObject() else
+            JSON.parseObject(record.getString("d1_feature"))
+          if (d1.nonEmpty) {
+            featureMap.put("d1_exp", if (d1.containsKey("exp")) d1.getString("exp").toDouble else 0D)
+            featureMap.put("d1_return_n", if (d1.containsKey("return_n")) d1.getString("return_n").toDouble else 0D)
+            featureMap.put("d1_rovn", if (d1.containsKey("rovn")) d1.getString("rovn").toDouble else 0D)
+          }
+
+
+          /*
+
+
+          视频:
+          曝光使用pv 分享使用pv 回流使用uv --> 1h 2h 3h 4h 12h 1d 3d 7d
+          STR log(share) ROV log(return) ROV*log(return)
+          40个特征组合
+          整体、整体曝光对应、推荐非冷启root、推荐冷启root、分省份root
+          200个特征值
+
+          视频:
+          视频时长、比特率
+
+          人:
+          播放次数 --> 6h 1d 3d 7d --> 4个
+          带回来的分享pv 回流uv --> 12h 1d 3d 7d --> 8个
+          人+vid-title:
+          播放点/回流点/分享点/累积分享/累积回流 --> 1d 3d 7d --> 匹配数量 语义最高相似度分 语义平均相似度分 --> 45个
+          人+vid-cf
+          基于分享行为/基于回流行为 -->  “分享cf”+”回流点击cf“ 相似分 相似数量 相似rank的倒数 --> 12个
+
+          头部视频:
+          曝光 回流 ROVn 3个特征
+
+          场景:
+          小时 星期 apptype city province pagesource 机器型号
+           */
+
+
+          //4 处理label信息。
+          val labels = new JSONObject
+          for (labelKey <- List(
+            "is_play", "is_share", "is_return", "noself_is_return", "return_uv", "noself_return_uv", "total_return_uv",
+            "share_pv", "total_share_uv"
+          )) {
+            if (!record.isNull(labelKey)) {
+              labels.put(labelKey, record.getString(labelKey))
+            }
+          }
+          //5 处理log key表头。
+          val apptype = record.getString("apptype")
+          val pagesource = record.getString("pagesource")
+          val mid = record.getString("mid")
+          // vid 已经提取了
+          val ts = record.getString("ts")
+          val abcode = record.getString("abcode")
+          val level = if (record.isNull("level")) "0" else record.getString("level")
+          val logKey = (apptype, pagesource, mid, vid, ts, abcode, level).productIterator.mkString(",")
+          val labelKey = labels.toString()
+          val featureKey = featureMap.toString()
+          //6 拼接数据,保存。
+          logKey + "\t" + labelKey + "\t" + featureKey
+
+        })
+
+      // 4 保存数据到hdfs
+      val savePartition = dt + hh
+      val hdfsPath = savePath + "/" + savePartition
+      if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
+        println("删除路径并开始数据写入:" + hdfsPath)
+        MyHdfsUtils.delete_hdfs_path(hdfsPath)
+        odpsData.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+      } else {
+        println("路径不合法,无法写入:" + hdfsPath)
+      }
+    }
+  }
+
+  def func(record: Record, schema: TableSchema): Record = {
+    record
+  }
+
+  def funcC34567ForTags(tags: String, title: String): Tuple4[Double, String, Double, Double] = {
+    // 匹配数量 匹配词 语义最高相似度分 语义平均相似度分
+    val tagsList = tags.split(",")
+    var d1 = 0.0
+    val d2 = new ArrayBuffer[String]()
+    var d3 = 0.0
+    var d4 = 0.0
+    for (tag <- tagsList) {
+      if (title.contains(tag)) {
+        d1 = d1 + 1.0
+        d2.add(tag)
+      }
+      val score = Similarity.conceptSimilarity(tag, title)
+      d3 = if (score > d3) score else d3
+      d4 = d4 + score
+    }
+    d4 = if (tagsList.nonEmpty) d4 / tagsList.size else d4
+    (d1, d2.mkString(","), d3, d4)
+  }
+}

+ 73 - 0
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本-I2I

@@ -0,0 +1,73 @@
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_dssm.makedata_i2i_01_originData_20241127 \
+--master yarn --driver-memory 2G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+beginStr:2024113000 endStr:2024113023 negCnt:20 \
+tablePart:64 savePath:/dw/recommend/model/51_dssm_i2i_sample/ > p51_2.log 2>&1 &
+
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_dssm.makedata_i2i_02_joinFeatureData_20241128 \
+--master yarn --driver-memory 2G --executor-memory 2G --executor-cores 1 --num-executors 32 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+beginStr:2024113000 endStr:2024113023 \
+tablePart:64 \
+readPath:/dw/recommend/model/51_dssm_i2i_sample/ \
+savePath:/dw/recommend/model/52_dssm_i2i_joinfeature/ > p52_2.log 2>&1 &
+
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_dssm.makedata_i2i_03_onehotFile_20241128 \
+--master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 32 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+dt:20241201 \
+tablePart:64 \
+readPath:/dw/recommend/model/53_dssm_i2i_onehot/after_20241201_file \
+savePath:/dw/recommend/model/53_dssm_i2i_onehot/after_20241201_file > p53.log 2>&1 &
+
+
+
+
+数据量:3544265打印各个特征多少枚举值:vid       3534920
+video_style     7382
+captions_color  782
+valid_time      754
+audience_age_group      80
+cate2   67
+audience_value_type     65
+font_size       49
+audience_gender 47
+cover_persons_num       45
+cate1   42
+sentiment_tendency      11
+video_type      8
+background_music_type   6
+captions        3
+has_end_credit_guide    2
+timeliness      2
+
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_dssm.makedata_i2i_04_bucketFile_20241128 \
+--master yarn --driver-memory 16G --executor-memory 2G --executor-cores 1 --num-executors 32 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+readPath:/dw/recommend/model/52_dssm_i2i_joinfeature/20241126* \
+savePath:/dw/recommend/model/54_dssm_i2i_bucketfile/ \
+fileName:47_rate_v2  bucketNum:100 > p54.log 2>&1 &
+
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_dssm.makedata_i2i_05_trainData_20241129 \
+--master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 32 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+beginStr:2024113000 endStr:2024113023 \
+readPath:/dw/recommend/model/52_dssm_i2i_joinfeature/ \
+savePath:/dw/recommend/model/55_dssm_i2i_traindata/ \
+onehotPath:/dw/recommend/model/53_dssm_i2i_onehot/after_20241201_file \
+bucketFile:20241128_recsys_i2i_bucket_47_v2.txt > p55_2.log 2>&1 &
+
+nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata_dssm.makedata_i2i_06_itemPred_20241206 \
+--master yarn --driver-memory 2G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+onehotPath:/dw/recommend/model/53_dssm_i2i_onehot/after_20241201_file \
+bucketFile:20241128_recsys_i2i_bucket_47_v2.txt repartition:100 \
+dt:20241206 \
+savePath:/dw/recommend/model/56_dssm_i2i_itempredData/ \
+> p56.log 2>&1 &