فهرست منبع

feat:添加对单个CID打分的脚本

zhaohaipeng 9 ماه پیش
والد
کامیت
8de7b0476a
1فایلهای تغییر یافته به همراه21 افزوده شده و 21 حذف شده
  1. 21 21
      src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240728.scala

+ 21 - 21
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240728.scala

@@ -99,12 +99,6 @@ object makedata_ad_33_bucketData_20240728 {
           case (logKey, labelKey, features) =>
             val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
 
-            for (name <- contentList) {
-              if (!features.contains(name)) {
-                features.put(name, 0);
-              }
-            }
-
             (label, features)
         }
         .mapPartitions(row => {
@@ -112,30 +106,36 @@ object makedata_ad_33_bucketData_20240728 {
           val bucketsMap = bucketsMap_br.value
           row.foreach {
             case (label, features) =>
-              val featuresBucket = features.map {
-                case (name, score) =>
-                  var ifFilter = false
-                  if (filterNames.nonEmpty) {
-                    filterNames.foreach(r => if (!ifFilter && name.contains(r)) {
-                      ifFilter = true
-                    })
-                  }
-                  if (ifFilter) {
-                    ""
-                  } else {
+              var featuresBucket = ""
+              for (name <- contentList) {
+                var ifFilter = false
+                if (filterNames.nonEmpty) {
+                  filterNames.foreach(r => if (!ifFilter && name.contains(r)) {
+                    ifFilter = true
+                  })
+                }
+                if (ifFilter) {
+                  ""
+                } else {
+                  if (features.contains(name)) {
+                    val score = features(name)
                     if (score > 1E-8) {
                       if (bucketsMap.contains(name)) {
                         val (bucketsNum, buckets) = bucketsMap(name)
                         val scoreNew = 0.01 + 1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
-                        name + ":" + scoreNew.toString
+                        featuresBucket = featuresBucket + (name + ":" + scoreNew.toString)
                       } else {
-                        name + ":" + score.toString
+                        featuresBucket = featuresBucket + (name + ":" + score.toString)
                       }
                     } else {
-                      name + ":" + "0.01"
+                      featuresBucket = featuresBucket + (name + ":" + "0.01")
                     }
+
+                  } else {
+                    featuresBucket = featuresBucket + (name + ":" + "0.01")
                   }
-              }.filter(_.nonEmpty)
+                }
+              }
               result.add(label + "\t" + featuresBucket.mkString("\t"))
           }
           result.iterator