Jelajahi Sumber

feat:添加特征延迟验证脚本

zhaohaipeng 8 bulan lalu
induk
melakukan
16f7dafbea

+ 17 - 4
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/xgb/makedata_31_bucketDataPrint_20240821.scala

@@ -421,15 +421,21 @@ object makedata_31_bucketDataPrint_20240821 {
             val labelKey = labels.toString()
             val label = record.getString("ad_is_conversion")
             //6 拼接数据,保存。
-            (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap, !allfeature.containsKey("weight_sum"))
+            (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap)
           }).filter {
-            case (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap, flag) =>
-              flag
+            case (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap) =>
+              if (allfeature.isEmpty) {
+                return false
+              } else if (allfeature.containsKey("weight_sum") || allfeature.contains("weight")) {
+                return false
+              }
+
+              return true
           }.mapPartitions(row => {
             val result = new ArrayBuffer[String]()
             val bucketsMap = bucketsMap_br.value
             row.foreach {
-              case (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap, flag) =>
+              case (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap) =>
                 val offlineFeatureMap = featureMap.map(r => {
                   val score = r._2.toString.toDouble
                   val name = r._1
@@ -471,9 +477,16 @@ object makedata_31_bucketDataPrint_20240821 {
       // 680实验,517个特征
       row.foreach(r => {
         val rList = r.split("\t")
+        val cid = rList(2).toString
         val label = rList(5).toString
         val allFeatureMap = JSON.parseObject(rList(6)).toMap.map(r => (r._1, r._2.toString))
         val offlineFeature = rList(7).split(",").map(r => (r.split(":")(0), r.split(":")(1))).toMap
+        if (!allFeatureMap.containsKey("cid_" + cid)) {
+          allFeatureMap.put("cid_" + cid, "0.1");
+        }
+        if (!offlineFeature.containsKey("cid_" + cid)) {
+          offlineFeature.containsKey("cid_" + cid);
+        }
         val offlineFeatureList = offlineFeature.map {
           case (key, value) =>
             key + ":" + value