Kaynağa Gözat

广告模型验证

zhangbo 4 ay önce
ebeveyn
işleme
9f51f552b2

+ 1 - 43
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_34_bucketDataPrint_20241217.scala

@@ -40,7 +40,7 @@ object makedata_ad_34_bucketDataPrint_20241217 {
 
 
     val loader = getClass.getClassLoader
-    val resourceUrl = loader.getResource("20240718_ad_feature_name.txt")
+    val resourceUrl = loader.getResource("20240703_ad_feature_name.txt")
     val content =
       if (resourceUrl != null) {
         val content = Source.fromURL(resourceUrl).getLines().mkString("\n")
@@ -89,35 +89,10 @@ object makedata_ad_34_bucketDataPrint_20241217 {
         partition = partition,
         transfer = func,
         numPartition = tablePart)
-//        .filter(record =>{
-//          val flag1 = record.isNull("metafeaturemap")
-//          val flag2 = record.isNull("extend")
-//          if (flag1 || flag2){
-//            false
-//          }else{
-//            val apptype = record.getString("apptype")
-//            val extend = record.getString("extend")
-//            val abcode = JSON.parseObject(extend).getString("abcode")
-//            val scoreMap = record.getString("scoremap")
-//            val ctcvr = JSON.parseObject(scoreMap).getString("ctcvrScore").toDouble
-//            if (
-//              apptype.equals("4")
-//                && Set("ab0", "ab1", "ab2", "ab3", "ab4").contains(abcode)
-//            ) {
-//              true
-//            } else {
-//              false
-//            }
-//          }
-//        })
         .map(record => {
-
           val ts = record.getString("ts").toInt
           val cid = record.getString("cid")
-
-
           val featureMap = new JSONObject()
-
           val b1: JSONObject = if (record.isNull("b1_feature")) new JSONObject() else
             JSON.parseObject(record.getString("b1_feature"))
           val b2: JSONObject = if (record.isNull("b2_feature")) new JSONObject() else
@@ -136,8 +111,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
             JSON.parseObject(record.getString("b8_feature"))
           val b9: JSONObject = if (record.isNull("b9_feature")) new JSONObject() else
             JSON.parseObject(record.getString("b9_feature"))
-
-
           featureMap.put("cid_" + cid, idDefaultValue)
           // if (b1.containsKey("adid") && b1.getString("adid").nonEmpty) {
           //   featureMap.put("adid_" + b1.getString("adid"), idDefaultValue)
@@ -150,14 +123,11 @@ object makedata_ad_34_bucketDataPrint_20241217 {
           // }
           val hour = DateTimeUtil.getHourByTimestamp(ts)
           featureMap.put("hour_" + hour, 0.1)
-
           val dayOfWeek = DateTimeUtil.getDayOrWeekByTimestamp(ts)
           featureMap.put("dayofweek_" + dayOfWeek, 0.1);
-
           if (b1.containsKey("cpa")) {
             featureMap.put("cpa", b1.getString("cpa").toDouble)
           }
-
           for ((bn, prefix1) <- List(
             (b2, "b2"), (b3, "b3"), (b4, "b4"), (b5, "b5"), (b8, "b8"), (b9, "b9")
           )) {
@@ -184,7 +154,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
               featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver*ctcvr", conver * f2)
             }
           }
-
           for ((bn, prefix1) <- List(
             (b6, "b6"), (b7, "b7")
           )) {
@@ -211,10 +180,8 @@ object makedata_ad_34_bucketDataPrint_20241217 {
               featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver*ctcvr", conver * f2)
             }
           }
-
           val c1: JSONObject = if (record.isNull("c1_feature")) new JSONObject() else
             JSON.parseObject(record.getString("c1_feature"))
-
           val midActionList = if (c1.containsKey("action") && c1.getString("action").nonEmpty) {
             c1.getString("action").split(",").map(r => {
               val rList = r.split(":")
@@ -236,7 +203,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
           featureMap.put("ctcvr_all", RankExtractorFeature_20240530.calDiv(converAll, viewAll))
           featureMap.put("cvr_all", RankExtractorFeature_20240530.calDiv(clickAll, converAll))
           // featureMap.put("ecpm_all", RankExtractorFeature_20240530.calDiv(incomeAll * 1000, viewAll))
-
           // ui特征
           val midTimeDiff = scala.collection.mutable.Map[String, Double]()
           midActionList.foreach {
@@ -251,7 +217,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
                 midTimeDiff.put("timediff_conver_" + cid, 1.0 / ((ts - ts_history).toDouble / 3600.0 / 24.0))
               }
           }
-
           val midActionStatic = scala.collection.mutable.Map[String, Double]()
           midActionList.foreach {
             case (cid, (ts_history, click, conver, income, title)) =>
@@ -260,7 +225,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
               midActionStatic.put("actionstatic_conver_" + cid, conver + midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0))
               midActionStatic.put("actionstatic_income_" + cid, income + midActionStatic.getOrDefault("actionstatic_income_" + cid, 0.0))
           }
-
           if (midTimeDiff.contains("timediff_view_" + cid)) {
             featureMap.put("timediff_view", midTimeDiff.getOrDefault("timediff_view_" + cid, 0.0))
           }
@@ -300,7 +264,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
               midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0)
             ))
           }
-
           val e1: JSONObject = if (record.isNull("e1_feature")) new JSONObject() else
             JSON.parseObject(record.getString("e1_feature"))
           val e2: JSONObject = if (record.isNull("e2_feature")) new JSONObject() else
@@ -319,14 +282,12 @@ object makedata_ad_34_bucketDataPrint_20241217 {
               }
             }
           }
-
           val d1: JSONObject = if (record.isNull("d1_feature")) new JSONObject() else
             JSON.parseObject(record.getString("d1_feature"))
           val d2: JSONObject = if (record.isNull("d2_feature")) new JSONObject() else
             JSON.parseObject(record.getString("d2_feature"))
           val d3: JSONObject = if (record.isNull("d3_feature")) new JSONObject() else
             JSON.parseObject(record.getString("d3_feature"))
-
           if (d1.nonEmpty) {
             for (prefix <- List("3h", "6h", "12h", "1d", "3d", "7d")) {
               val view = if (!d1.containsKey("ad_view_" + prefix)) 0D else d1.getIntValue("ad_view_" + prefix).toDouble
@@ -345,7 +306,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
               // featureMap.put("d1_feature" + "_" + prefix + "_" + "ecpm", f5)
             }
           }
-
           val vidRankMaps = scala.collection.mutable.Map[String, scala.collection.immutable.Map[String, Double]]()
           if (d2.nonEmpty) {
             d2.foreach(r => {
@@ -373,9 +333,7 @@ object makedata_ad_34_bucketDataPrint_20241217 {
             val score = Similarity.conceptSimilarity(title, vTitle)
             featureMap.put("ctitle_vtitle_similarity", score);
           }
-
           val flag = record.isNull("metafeaturemap")
-
           val allfeaturemap = if (record.isNull("allfeaturemap")) new JSONObject() else
             JSON.parseObject(record.getString("allfeaturemap"))
           val apptype = record.getString("apptype")

+ 6 - 6
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本-广告

@@ -48,14 +48,14 @@ filterNames:"XXXXXX,adid_,targeting_conversion_,b2_3h_click,b2_3h_conver*log(vie
 
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
---class com.aliyun.odps.spark.examples.makedata_ad.makedata_ad_33_bucketDataPrint_20240628 \
+--class com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_34_bucketDataPrint_20241217 \
 --master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-beginStr:2024062908 endStr:2024062923 \
-readDate:20240629 \
-table:alg_recsys_ad_sample_all_new \
-savePath:/dw/recommend/model/33_for_check/ \
-> p33_data_check.log 2>&1 &
+beginStr:2024121708 endStr:2024121709 \
+readDate:20241217 \
+table:alg_recsys_ad_sample_all \
+savePath:/dw/recommend/model/34_for_check/ \
+> p34_data_check.log 2>&1 &
 
 
 /dw/recommend/model/33_for_check_v1/