Bladeren bron

feat:添加特征延迟验证脚本

zhaohaipeng 8 maanden geleden
bovenliggende
commit
b1fb79103f

+ 18 - 28
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/xgb/makedata_31_bucketDataPrint_20240821.scala

@@ -489,6 +489,17 @@ object makedata_31_bucketDataPrint_20240821 {
             key + ":" + value
         }.filter(_.nonEmpty).toList
 
+        val offlineFeatureV2 = offlineFeature.map {
+          case (key, value) => {
+            val b9FeatureSet = Set("b9_1h_ctr", "b9_1h_ctcvr", "b9_1h_cvr", "b9_1h_conver", "b9_1h_click", "b9_1h_conver*log(view)", "b9_1h_conver*ctcvr", "b9_2h_ctr", "b9_2h_ctcvr", "b9_2h_cvr", "b9_2h_conver", "b9_2h_click", "b9_2h_conver*log(view)", "b9_2h_conver*ctcvr", "b9_3h_ctr", "b9_3h_ctcvr", "b9_3h_cvr", "b9_3h_conver", "b9_3h_click", "b9_3h_conver*log(view)", "b9_3h_conver*ctcvr", "b9_6h_ctr", "b9_6h_ctcvr", "b9_6h_cvr", "b9_6h_conver", "b9_6h_click", "b9_6h_conver*log(view)", "b9_6h_conver*ctcvr", "b9_12h_ctr", "b9_12h_ctcvr", "b9_12h_cvr", "b9_12h_conver", "b9_12h_click", "b9_12h_conver*log(view)", "b9_12h_conver*ctcvr", "b9_1d_ctr", "b9_1d_ctcvr", "b9_1d_cvr", "b9_1d_conver", "b9_1d_click", "b9_1d_conver*log(view)", "b9_1d_conver*ctcvr", "b9_3d_ctr", "b9_3d_ctcvr", "b9_3d_cvr", "b9_3d_conver", "b9_3d_click", "b9_3d_conver*log(view)", "b9_3d_conver*ctcvr", "b9_7d_ctr", "b9_7d_ctcvr", "b9_7d_cvr", "b9_7d_conver", "b9_7d_click", "b9_7d_conver*log(view)", "b9_7d_conver*ctcvr", "b9_yesterday_ctr", "b9_yesterday_ctcvr", "b9_yesterday_cvr", "b9_yesterday_conver", "b9_yesterday_click", "b9_yesterday_conver*log(view)", "b9_yesterday_conver*ctcvr", "b9_today_ctr", "b9_today_ctcvr", "b9_today_cvr", "b9_today_conver", "b9_today_click", "b9_today_conver*log(view)", "b9_today_conver*ctcvr")
+            if (b9FeatureSet.contains(key)) {
+              ""
+            } else {
+              key + ":" + value
+            }
+          }
+        }.filter(_.nonEmpty)
+
         val allFeatureV1 = allFeatureMap.map {
           case (key, value) =>
             key + ":" + value
@@ -504,27 +515,6 @@ object makedata_31_bucketDataPrint_20240821 {
             }
         }.filter(_.nonEmpty).toList
 
-        val cidKey = "cid_" + cid + ":0.1"
-        val allFeatureV3 = contentList.map {
-          case (name) =>
-            val b9FeatureSet = Set("b9_1h_ctr", "b9_1h_ctcvr", "b9_1h_cvr", "b9_1h_conver", "b9_1h_click", "b9_1h_conver*log(view)", "b9_1h_conver*ctcvr", "b9_2h_ctr", "b9_2h_ctcvr", "b9_2h_cvr", "b9_2h_conver", "b9_2h_click", "b9_2h_conver*log(view)", "b9_2h_conver*ctcvr", "b9_3h_ctr", "b9_3h_ctcvr", "b9_3h_cvr", "b9_3h_conver", "b9_3h_click", "b9_3h_conver*log(view)", "b9_3h_conver*ctcvr", "b9_6h_ctr", "b9_6h_ctcvr", "b9_6h_cvr", "b9_6h_conver", "b9_6h_click", "b9_6h_conver*log(view)", "b9_6h_conver*ctcvr", "b9_12h_ctr", "b9_12h_ctcvr", "b9_12h_cvr", "b9_12h_conver", "b9_12h_click", "b9_12h_conver*log(view)", "b9_12h_conver*ctcvr", "b9_1d_ctr", "b9_1d_ctcvr", "b9_1d_cvr", "b9_1d_conver", "b9_1d_click", "b9_1d_conver*log(view)", "b9_1d_conver*ctcvr", "b9_3d_ctr", "b9_3d_ctcvr", "b9_3d_cvr", "b9_3d_conver", "b9_3d_click", "b9_3d_conver*log(view)", "b9_3d_conver*ctcvr", "b9_7d_ctr", "b9_7d_ctcvr", "b9_7d_cvr", "b9_7d_conver", "b9_7d_click", "b9_7d_conver*log(view)", "b9_7d_conver*ctcvr", "b9_yesterday_ctr", "b9_yesterday_ctcvr", "b9_yesterday_cvr", "b9_yesterday_conver", "b9_yesterday_click", "b9_yesterday_conver*log(view)", "b9_yesterday_conver*ctcvr", "b9_today_ctr", "b9_today_ctcvr", "b9_today_cvr", "b9_today_conver", "b9_today_click", "b9_today_conver*log(view)", "b9_today_conver*ctcvr")
-            if (b9FeatureSet.contains(name)) {
-              if (offlineFeature.contains(name)) {
-                name + ":" + offlineFeature(name)
-              } else if (allFeatureMap.contains(name)) {
-                name + ":" + allFeatureMap(name)
-              } else {
-                ""
-              }
-            } else {
-              if (allFeatureMap.contains(name)) {
-                name + ":" + allFeatureMap(name)
-              } else {
-                ""
-              }
-            }
-        }.filter(_.nonEmpty) :+ cidKey
-
         val ctcvrFeature = offlineFeature.map {
           case (key, value) =>
             if (key.contains("ctcvr") || key.contains("Ctcvr")) {
@@ -534,7 +524,7 @@ object makedata_31_bucketDataPrint_20240821 {
             }
         }.filter(_.nonEmpty).toList
 
-        result.add((label, offlineFeatureList, allFeatureV1, allFeatureV2, ctcvrFeature, allFeatureV3))
+        result.add((label, offlineFeatureList, allFeatureV1, allFeatureV2, ctcvrFeature, offlineFeatureV2))
       })
 
       result.iterator
@@ -576,13 +566,13 @@ object makedata_31_bucketDataPrint_20240821 {
       println("路径不合法,无法写入:" + ctcvrFeature)
     }
 
-    val allFeatureV3 = "/dw/recommend/model/33_for_check_all_v3/" + readDate
-    if (allFeatureV3.nonEmpty && allFeatureV3.startsWith("/dw/recommend/model/")) {
-      println("删除路径并开始数据写入:" + allFeatureV3)
-      MyHdfsUtils.delete_hdfs_path(allFeatureV3)
-      data2.map(r => r._1 + "\t" + r._6.mkString("\t")).saveAsTextFile(allFeatureV3, classOf[GzipCodec])
+    val offlineFeatureV2 = "/dw/recommend/model/33_for_check_offline_v2/" + readDate
+    if (offlineFeatureV2.nonEmpty && offlineFeatureV2.startsWith("/dw/recommend/model/")) {
+      println("删除路径并开始数据写入:" + offlineFeatureV2)
+      MyHdfsUtils.delete_hdfs_path(offlineFeatureV2)
+      data2.map(r => r._1 + "\t" + r._6.mkString("\t")).saveAsTextFile(offlineFeatureV2, classOf[GzipCodec])
     } else {
-      println("路径不合法,无法写入:" + allFeatureV3)
+      println("路径不合法,无法写入:" + offlineFeatureV2)
     }
 
   }