Browse Source

广告新增特征

zhangbo 9 months ago
parent
commit
04e68b793d

+ 351 - 0
src/main/resources/20240703_ad_feature_name.txt

@@ -0,0 +1,351 @@
+cpa
+b2_3h_ctr
+b2_3h_ctcvr
+b2_3h_cvr
+b2_3h_conver
+b2_3h_ecpm
+b2_3h_click
+b2_3h_conver*log(view)
+b2_3h_conver*ctcvr
+b2_6h_ctr
+b2_6h_ctcvr
+b2_6h_cvr
+b2_6h_conver
+b2_6h_ecpm
+b2_6h_click
+b2_6h_conver*log(view)
+b2_6h_conver*ctcvr
+b2_12h_ctr
+b2_12h_ctcvr
+b2_12h_cvr
+b2_12h_conver
+b2_12h_ecpm
+b2_12h_click
+b2_12h_conver*log(view)
+b2_12h_conver*ctcvr
+b2_1d_ctr
+b2_1d_ctcvr
+b2_1d_cvr
+b2_1d_conver
+b2_1d_ecpm
+b2_1d_click
+b2_1d_conver*log(view)
+b2_1d_conver*ctcvr
+b2_3d_ctr
+b2_3d_ctcvr
+b2_3d_cvr
+b2_3d_conver
+b2_3d_ecpm
+b2_3d_click
+b2_3d_conver*log(view)
+b2_3d_conver*ctcvr
+b2_7d_ctr
+b2_7d_ctcvr
+b2_7d_cvr
+b2_7d_conver
+b2_7d_ecpm
+b2_7d_click
+b2_7d_conver*log(view)
+b2_7d_conver*ctcvr
+b3_3h_ctr
+b3_3h_ctcvr
+b3_3h_cvr
+b3_3h_conver
+b3_3h_ecpm
+b3_3h_click
+b3_3h_conver*log(view)
+b3_3h_conver*ctcvr
+b3_6h_ctr
+b3_6h_ctcvr
+b3_6h_cvr
+b3_6h_conver
+b3_6h_ecpm
+b3_6h_click
+b3_6h_conver*log(view)
+b3_6h_conver*ctcvr
+b3_12h_ctr
+b3_12h_ctcvr
+b3_12h_cvr
+b3_12h_conver
+b3_12h_ecpm
+b3_12h_click
+b3_12h_conver*log(view)
+b3_12h_conver*ctcvr
+b3_1d_ctr
+b3_1d_ctcvr
+b3_1d_cvr
+b3_1d_conver
+b3_1d_ecpm
+b3_1d_click
+b3_1d_conver*log(view)
+b3_1d_conver*ctcvr
+b3_3d_ctr
+b3_3d_ctcvr
+b3_3d_cvr
+b3_3d_conver
+b3_3d_ecpm
+b3_3d_click
+b3_3d_conver*log(view)
+b3_3d_conver*ctcvr
+b3_7d_ctr
+b3_7d_ctcvr
+b3_7d_cvr
+b3_7d_conver
+b3_7d_ecpm
+b3_7d_click
+b3_7d_conver*log(view)
+b3_7d_conver*ctcvr
+b4_3h_ctr
+b4_3h_ctcvr
+b4_3h_cvr
+b4_3h_conver
+b4_3h_ecpm
+b4_3h_click
+b4_3h_conver*log(view)
+b4_3h_conver*ctcvr
+b4_6h_ctr
+b4_6h_ctcvr
+b4_6h_cvr
+b4_6h_conver
+b4_6h_ecpm
+b4_6h_click
+b4_6h_conver*log(view)
+b4_6h_conver*ctcvr
+b4_12h_ctr
+b4_12h_ctcvr
+b4_12h_cvr
+b4_12h_conver
+b4_12h_ecpm
+b4_12h_click
+b4_12h_conver*log(view)
+b4_12h_conver*ctcvr
+b4_1d_ctr
+b4_1d_ctcvr
+b4_1d_cvr
+b4_1d_conver
+b4_1d_ecpm
+b4_1d_click
+b4_1d_conver*log(view)
+b4_1d_conver*ctcvr
+b4_3d_ctr
+b4_3d_ctcvr
+b4_3d_cvr
+b4_3d_conver
+b4_3d_ecpm
+b4_3d_click
+b4_3d_conver*log(view)
+b4_3d_conver*ctcvr
+b4_7d_ctr
+b4_7d_ctcvr
+b4_7d_cvr
+b4_7d_conver
+b4_7d_ecpm
+b4_7d_click
+b4_7d_conver*log(view)
+b4_7d_conver*ctcvr
+b5_3h_ctr
+b5_3h_ctcvr
+b5_3h_cvr
+b5_3h_conver
+b5_3h_ecpm
+b5_3h_click
+b5_3h_conver*log(view)
+b5_3h_conver*ctcvr
+b5_6h_ctr
+b5_6h_ctcvr
+b5_6h_cvr
+b5_6h_conver
+b5_6h_ecpm
+b5_6h_click
+b5_6h_conver*log(view)
+b5_6h_conver*ctcvr
+b5_12h_ctr
+b5_12h_ctcvr
+b5_12h_cvr
+b5_12h_conver
+b5_12h_ecpm
+b5_12h_click
+b5_12h_conver*log(view)
+b5_12h_conver*ctcvr
+b5_1d_ctr
+b5_1d_ctcvr
+b5_1d_cvr
+b5_1d_conver
+b5_1d_ecpm
+b5_1d_click
+b5_1d_conver*log(view)
+b5_1d_conver*ctcvr
+b5_3d_ctr
+b5_3d_ctcvr
+b5_3d_cvr
+b5_3d_conver
+b5_3d_ecpm
+b5_3d_click
+b5_3d_conver*log(view)
+b5_3d_conver*ctcvr
+b5_7d_ctr
+b5_7d_ctcvr
+b5_7d_cvr
+b5_7d_conver
+b5_7d_ecpm
+b5_7d_click
+b5_7d_conver*log(view)
+b5_7d_conver*ctcvr
+b8_3h_ctr
+b8_3h_ctcvr
+b8_3h_cvr
+b8_3h_conver
+b8_3h_ecpm
+b8_3h_click
+b8_3h_conver*log(view)
+b8_3h_conver*ctcvr
+b8_6h_ctr
+b8_6h_ctcvr
+b8_6h_cvr
+b8_6h_conver
+b8_6h_ecpm
+b8_6h_click
+b8_6h_conver*log(view)
+b8_6h_conver*ctcvr
+b8_12h_ctr
+b8_12h_ctcvr
+b8_12h_cvr
+b8_12h_conver
+b8_12h_ecpm
+b8_12h_click
+b8_12h_conver*log(view)
+b8_12h_conver*ctcvr
+b8_1d_ctr
+b8_1d_ctcvr
+b8_1d_cvr
+b8_1d_conver
+b8_1d_ecpm
+b8_1d_click
+b8_1d_conver*log(view)
+b8_1d_conver*ctcvr
+b8_3d_ctr
+b8_3d_ctcvr
+b8_3d_cvr
+b8_3d_conver
+b8_3d_ecpm
+b8_3d_click
+b8_3d_conver*log(view)
+b8_3d_conver*ctcvr
+b8_7d_ctr
+b8_7d_ctcvr
+b8_7d_cvr
+b8_7d_conver
+b8_7d_ecpm
+b8_7d_click
+b8_7d_conver*log(view)
+b8_7d_conver*ctcvr
+b6_7d_ctr
+b6_7d_ctcvr
+b6_7d_cvr
+b6_7d_conver
+b6_7d_ecpm
+b6_7d_click
+b6_7d_conver*log(view)
+b6_7d_conver*ctcvr
+b6_14d_ctr
+b6_14d_ctcvr
+b6_14d_cvr
+b6_14d_conver
+b6_14d_ecpm
+b6_14d_click
+b6_14d_conver*log(view)
+b6_14d_conver*ctcvr
+b7_7d_ctr
+b7_7d_ctcvr
+b7_7d_cvr
+b7_7d_conver
+b7_7d_ecpm
+b7_7d_click
+b7_7d_conver*log(view)
+b7_7d_conver*ctcvr
+b7_14d_ctr
+b7_14d_ctcvr
+b7_14d_cvr
+b7_14d_conver
+b7_14d_ecpm
+b7_14d_click
+b7_14d_conver*log(view)
+b7_14d_conver*ctcvr
+viewAll
+clickAll
+converAll
+incomeAll
+ctr_all
+ctcvr_all
+cvr_all
+ecpm_all
+timediff_view
+timediff_click
+timediff_conver
+actionstatic_view
+actionstatic_click
+actionstatic_conver
+actionstatic_income
+actionstatic_ctr
+actionstatic_ctcvr
+actionstatic_cvr
+e1_tags_3d_matchnum
+e1_tags_3d_maxscore
+e1_tags_3d_avgscore
+e1_tags_7d_matchnum
+e1_tags_7d_maxscore
+e1_tags_7d_avgscore
+e1_tags_14d_matchnum
+e1_tags_14d_maxscore
+e1_tags_14d_avgscore
+e2_tags_3d_matchnum
+e2_tags_3d_maxscore
+e2_tags_3d_avgscore
+e2_tags_7d_matchnum
+e2_tags_7d_maxscore
+e2_tags_7d_avgscore
+e2_tags_14d_matchnum
+e2_tags_14d_maxscore
+e2_tags_14d_avgscore
+d1_feature_3h_ctr
+d1_feature_3h_ctcvr
+d1_feature_3h_cvr
+d1_feature_3h_conver
+d1_feature_3h_ecpm
+d1_feature_6h_ctr
+d1_feature_6h_ctcvr
+d1_feature_6h_cvr
+d1_feature_6h_conver
+d1_feature_6h_ecpm
+d1_feature_12h_ctr
+d1_feature_12h_ctcvr
+d1_feature_12h_cvr
+d1_feature_12h_conver
+d1_feature_12h_ecpm
+d1_feature_1d_ctr
+d1_feature_1d_ctcvr
+d1_feature_1d_cvr
+d1_feature_1d_conver
+d1_feature_1d_ecpm
+d1_feature_3d_ctr
+d1_feature_3d_ctcvr
+d1_feature_3d_cvr
+d1_feature_3d_conver
+d1_feature_3d_ecpm
+d1_feature_7d_ctr
+d1_feature_7d_ctcvr
+d1_feature_7d_cvr
+d1_feature_7d_conver
+d1_feature_7d_ecpm
+vid_rank_ctr_1d
+vid_rank_ctr_3d
+vid_rank_ctr_7d
+vid_rank_ctr_14d
+vid_rank_ctcvr_1d
+vid_rank_ctcvr_3d
+vid_rank_ctcvr_7d
+vid_rank_ctcvr_14d
+vid_rank_ecpm_1d
+vid_rank_ecpm_3d
+vid_rank_ecpm_7d
+vid_rank_ecpm_14d

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_32_bucket_20240622.scala

@@ -22,7 +22,7 @@ object makedata_ad_32_bucket_20240622 {
     val sc = spark.sparkContext
 
     val loader = getClass.getClassLoader
-    val resourceUrl = loader.getResource("20240622_ad_feature_name.txt")
+    val resourceUrl = loader.getResource("20240703_ad_feature_name.txt")
     val content =
       if (resourceUrl != null) {
         val content = Source.fromURL(resourceUrl).getLines().mkString("\n")

+ 5 - 4
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240622.scala

@@ -24,7 +24,7 @@ object makedata_ad_33_bucketData_20240622 {
 
     val loader = getClass.getClassLoader
 
-    val resourceUrlBucket = loader.getResource("20240622_ad_bucket_249.txt")
+    val resourceUrlBucket = loader.getResource("20240703_ad_bucket_351.txt")
     val buckets =
       if (resourceUrlBucket != null) {
         val buckets = Source.fromURL(resourceUrlBucket).getLines().mkString("\n")
@@ -50,8 +50,9 @@ object makedata_ad_33_bucketData_20240622 {
     val savePath = param.getOrElse("savePath", "/dw/recommend/model/33_ad_train_data/")
     val beginStr = param.getOrElse("beginStr", "20240620")
     val endStr = param.getOrElse("endStr", "20240620")
-    val repartition = param.getOrElse("repartition", "200").toInt
+    val repartition = param.getOrElse("repartition", "100").toInt
     val filterNames = param.getOrElse("filterNames", "").split(",").toSet
+    val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
 
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
     for (date <- dateRange) {
@@ -71,11 +72,11 @@ object makedata_ad_33_bucketData_20240622 {
           case (logKey, labelKey, features) =>
             val logKeyList = logKey.split(",")
             val apptype = logKeyList(0)
-            !Set("12").contains(apptype)
+            !Set("12", "13").contains(apptype)
         }
         .map{
           case (logKey, labelKey, features) =>
-            val label = JSON.parseObject(labelKey).getOrDefault("ad_is_conversion", "0").toString
+            val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
             (label, features)
         }
         .mapPartitions(row => {

+ 9 - 7
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本-广告

@@ -6,8 +6,8 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 tablePart:64 repartition:16 \
-beginStr:2024062008 endStr:2024062223 \
-savePath:/dw/recommend/model/31_ad_sample_data_fix/ \
+beginStr:2024062008 endStr:2024062123 \
+savePath:/dw/recommend/model/31_ad_sample_data_v3/ \
 table:alg_recsys_ad_sample_all filterHours:00,01,02,03,04,05,06,07 \
 idDefaultValue:0.01 \
 > p31_2024062008.log 2>&1 &
@@ -20,9 +20,9 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 16 \
 --conf spark.driver.maxResultSize=16G \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-fileName:20240620_100_fix \
-readPath:/dw/recommend/model/31_ad_sample_data_fix/20240620* \
-savePath:/dw/recommend/model/32_bucket_data/ \
+readPath:/dw/recommend/model/31_ad_sample_data_v3/2024062[01] \
+savePath:/dw/recommend/model/32_bucket_file/ \
+fileName:20240703_100_fix sampleRate:1.0 bucketNum:100 \
 > p32_data.log 2>&1 &
 
 
@@ -30,8 +30,10 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --class com.aliyun.odps.spark.examples.makedata_ad.makedata_ad_33_bucketData_20240622 \
 --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-beginStr:20240623 endStr:20240623 repartition:400 \
-filterNames:XXXXXXX \
+readPath:/dw/recommend/model/31_ad_sample_data_v3/ \
+savePath:/dw/recommend/model/33_ad_train_data_v3/ \
+beginStr:20240620 endStr:20240621 repartition:100 \
+filterNames:adid_,targeting_conversion_ \
 > p33_data.log 2>&1 &
 
 filterNames:cid_,adid_,adverid_,targeting_conversion_ \