Procházet zdrojové kódy

推荐样本生产-特征分桶

zhangbo před 10 měsíci
rodič
revize
a7f076d4be

+ 258 - 2
src/main/resources/20240608_feature_name.txt

@@ -1,18 +1,274 @@
-total_time
-bit_rate
+b123_1h_STR
+b123_1h_log(share)
+b123_1h_ROV
+b123_1h_log(return)
+b123_1h_ROV*log(return)
+b123_2h_STR
+b123_2h_log(share)
+b123_2h_ROV
+b123_2h_log(return)
+b123_2h_ROV*log(return)
+b123_3h_STR
+b123_3h_log(share)
+b123_3h_ROV
+b123_3h_log(return)
+b123_3h_ROV*log(return)
+b123_4h_STR
+b123_4h_log(share)
+b123_4h_ROV
+b123_4h_log(return)
+b123_4h_ROV*log(return)
+b123_12h_STR
+b123_12h_log(share)
+b123_12h_ROV
+b123_12h_log(return)
+b123_12h_ROV*log(return)
+b123_1d_STR
+b123_1d_log(share)
+b123_1d_ROV
+b123_1d_log(return)
+b123_1d_ROV*log(return)
+b123_3d_STR
+b123_3d_log(share)
+b123_3d_ROV
+b123_3d_log(return)
+b123_3d_ROV*log(return)
 b123_7d_STR
+b123_7d_log(share)
+b123_7d_ROV
+b123_7d_log(return)
+b123_7d_ROV*log(return)
+b167_1h_STR
+b167_1h_log(share)
+b167_1h_ROV
+b167_1h_log(return)
+b167_1h_ROV*log(return)
+b167_2h_STR
+b167_2h_log(share)
+b167_2h_ROV
+b167_2h_log(return)
+b167_2h_ROV*log(return)
+b167_3h_STR
+b167_3h_log(share)
+b167_3h_ROV
+b167_3h_log(return)
+b167_3h_ROV*log(return)
+b167_4h_STR
+b167_4h_log(share)
+b167_4h_ROV
+b167_4h_log(return)
+b167_4h_ROV*log(return)
+b167_12h_STR
+b167_12h_log(share)
+b167_12h_ROV
+b167_12h_log(return)
+b167_12h_ROV*log(return)
+b167_1d_STR
+b167_1d_log(share)
+b167_1d_ROV
+b167_1d_log(return)
+b167_1d_ROV*log(return)
+b167_3d_STR
+b167_3d_log(share)
+b167_3d_ROV
+b167_3d_log(return)
+b167_3d_ROV*log(return)
+b167_7d_STR
 b167_7d_log(share)
+b167_7d_ROV
+b167_7d_log(return)
+b167_7d_ROV*log(return)
+b8910_1h_STR
+b8910_1h_log(share)
+b8910_1h_ROV
+b8910_1h_log(return)
+b8910_1h_ROV*log(return)
+b8910_2h_STR
+b8910_2h_log(share)
+b8910_2h_ROV
+b8910_2h_log(return)
+b8910_2h_ROV*log(return)
+b8910_3h_STR
+b8910_3h_log(share)
+b8910_3h_ROV
+b8910_3h_log(return)
+b8910_3h_ROV*log(return)
+b8910_4h_STR
+b8910_4h_log(share)
+b8910_4h_ROV
+b8910_4h_log(return)
+b8910_4h_ROV*log(return)
+b8910_12h_STR
+b8910_12h_log(share)
+b8910_12h_ROV
+b8910_12h_log(return)
+b8910_12h_ROV*log(return)
+b8910_1d_STR
+b8910_1d_log(share)
+b8910_1d_ROV
+b8910_1d_log(return)
+b8910_1d_ROV*log(return)
+b8910_3d_STR
+b8910_3d_log(share)
+b8910_3d_ROV
+b8910_3d_log(return)
+b8910_3d_ROV*log(return)
+b8910_7d_STR
+b8910_7d_log(share)
 b8910_7d_ROV
+b8910_7d_log(return)
+b8910_7d_ROV*log(return)
+b111213_1h_STR
+b111213_1h_log(share)
+b111213_1h_ROV
+b111213_1h_log(return)
+b111213_1h_ROV*log(return)
+b111213_2h_STR
+b111213_2h_log(share)
+b111213_2h_ROV
+b111213_2h_log(return)
+b111213_2h_ROV*log(return)
+b111213_3h_STR
+b111213_3h_log(share)
+b111213_3h_ROV
+b111213_3h_log(return)
+b111213_3h_ROV*log(return)
+b111213_4h_STR
+b111213_4h_log(share)
+b111213_4h_ROV
+b111213_4h_log(return)
+b111213_4h_ROV*log(return)
+b111213_12h_STR
+b111213_12h_log(share)
+b111213_12h_ROV
+b111213_12h_log(return)
+b111213_12h_ROV*log(return)
+b111213_1d_STR
+b111213_1d_log(share)
+b111213_1d_ROV
+b111213_1d_log(return)
+b111213_1d_ROV*log(return)
+b111213_3d_STR
+b111213_3d_log(share)
+b111213_3d_ROV
+b111213_3d_log(return)
+b111213_3d_ROV*log(return)
+b111213_7d_STR
+b111213_7d_log(share)
+b111213_7d_ROV
 b111213_7d_log(return)
+b111213_7d_ROV*log(return)
+b171819_1h_STR
+b171819_1h_log(share)
+b171819_1h_ROV
+b171819_1h_log(return)
+b171819_1h_ROV*log(return)
+b171819_2h_STR
+b171819_2h_log(share)
+b171819_2h_ROV
+b171819_2h_log(return)
+b171819_2h_ROV*log(return)
+b171819_3h_STR
+b171819_3h_log(share)
+b171819_3h_ROV
+b171819_3h_log(return)
+b171819_3h_ROV*log(return)
+b171819_4h_STR
+b171819_4h_log(share)
+b171819_4h_ROV
+b171819_4h_log(return)
+b171819_4h_ROV*log(return)
+b171819_12h_STR
+b171819_12h_log(share)
+b171819_12h_ROV
+b171819_12h_log(return)
+b171819_12h_ROV*log(return)
+b171819_1d_STR
+b171819_1d_log(share)
+b171819_1d_ROV
+b171819_1d_log(return)
+b171819_1d_ROV*log(return)
+b171819_3d_STR
+b171819_3d_log(share)
+b171819_3d_ROV
+b171819_3d_log(return)
+b171819_3d_ROV*log(return)
+b171819_7d_STR
+b171819_7d_log(share)
+b171819_7d_ROV
+b171819_7d_log(return)
 b171819_7d_ROV*log(return)
+total_time
+bit_rate
+playcnt_6h
+playcnt_1d
+playcnt_3d
 playcnt_7d
+share_pv_12h
+share_pv_1d
+share_pv_3d
 share_pv_7d
+return_uv_12h
+return_uv_1d
+return_uv_3d
 return_uv_7d
+c3_feature_tags_1d_matchnum
+c3_feature_tags_1d_maxscore
+c3_feature_tags_1d_avgscore
+c3_feature_tags_3d_matchnum
+c3_feature_tags_3d_maxscore
+c3_feature_tags_3d_avgscore
+c3_feature_tags_7d_matchnum
 c3_feature_tags_7d_maxscore
+c3_feature_tags_7d_avgscore
+c4_feature_tags_1d_matchnum
+c4_feature_tags_1d_maxscore
+c4_feature_tags_1d_avgscore
+c4_feature_tags_3d_matchnum
+c4_feature_tags_3d_maxscore
+c4_feature_tags_3d_avgscore
+c4_feature_tags_7d_matchnum
 c4_feature_tags_7d_maxscore
+c4_feature_tags_7d_avgscore
+c5_feature_tags_1d_matchnum
+c5_feature_tags_1d_maxscore
+c5_feature_tags_1d_avgscore
+c5_feature_tags_3d_matchnum
+c5_feature_tags_3d_maxscore
+c5_feature_tags_3d_avgscore
+c5_feature_tags_7d_matchnum
 c5_feature_tags_7d_maxscore
+c5_feature_tags_7d_avgscore
+c6_feature_tags_1d_matchnum
+c6_feature_tags_1d_maxscore
+c6_feature_tags_1d_avgscore
+c6_feature_tags_3d_matchnum
+c6_feature_tags_3d_maxscore
+c6_feature_tags_3d_avgscore
+c6_feature_tags_7d_matchnum
 c6_feature_tags_7d_maxscore
+c6_feature_tags_7d_avgscore
+c7_feature_tags_1d_matchnum
+c7_feature_tags_1d_maxscore
+c7_feature_tags_1d_avgscore
+c7_feature_tags_3d_matchnum
+c7_feature_tags_3d_maxscore
+c7_feature_tags_3d_avgscore
+c7_feature_tags_7d_matchnum
 c7_feature_tags_7d_maxscore
+c7_feature_tags_7d_avgscore
 c8_feature_share_score
+c8_feature_share_num
+c8_feature_share_rank
+c8_feature_return_score
+c8_feature_return_num
+c8_feature_return_rank
+c9_feature_share_score
+c9_feature_share_num
+c9_feature_share_rank
 c9_feature_return_score
+c9_feature_return_num
+c9_feature_return_rank
+d1_exp
+d1_return_n
 d1_rovn

+ 5 - 3
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_13_originData_20240529.scala

@@ -186,8 +186,7 @@ object makedata_13_originData_20240529 {
 
 
           /*
-          视频:
-          视频时长、比特率
+
 
           视频:
           曝光使用pv 分享使用pv 回流使用uv --> 1h 2h 3h 4h 12h 1d 3d 7d
@@ -196,11 +195,14 @@ object makedata_13_originData_20240529 {
           整体、整体曝光对应、推荐非冷启root、推荐冷启root、分省份root
           200个特征值
 
+          视频:
+          视频时长、比特率
+
           人:
           播放次数 --> 6h 1d 3d 7d --> 4个
           带回来的分享pv 回流uv --> 12h 1d 3d 7d --> 8个
           人+vid-title:
-          播放点/回流点/分享点/累积分享/累积回流 --> 1d 3d 7d --> 匹配数量 匹配词 语义最高相似度分 语义平均相似度分 --> 60
+          播放点/回流点/分享点/累积分享/累积回流 --> 1d 3d 7d --> 匹配数量 语义最高相似度分 语义平均相似度分 --> 45
           人+vid-cf
           基于分享行为/基于回流行为 -->  “分享cf”+”回流点击cf“ 相似分 相似数量 相似rank的倒数 --> 12个