Просмотр исходного кода

feat:添加对单个CID打分的脚本

zhaohaipeng 9 месяцев назад
Родитель
Сommit
f03d19cd47

+ 23 - 21
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240728.scala

@@ -74,6 +74,7 @@ object makedata_ad_33_bucketData_20240728 {
     val contentList = content.split("\n")
       .map(r => r.replace(" ", "").replaceAll("\n", ""))
       .filter(r => r.nonEmpty).toList
+    println(contentList.length)
 
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
     for (date <- dateRange) {
@@ -87,6 +88,13 @@ object makedata_ad_33_bucketData_20240728 {
           jsons.foreach(r => {
             features.put(r._1, jsons.getDoubleValue(r._1))
           })
+
+          for (name <- contentList) {
+            if (!features.contains(name)) {
+              features.put(name, 0)
+            }
+          }
+
           (logKey, labelKey, features)
         })
         .filter {
@@ -106,36 +114,30 @@ object makedata_ad_33_bucketData_20240728 {
           val bucketsMap = bucketsMap_br.value
           row.foreach {
             case (label, features) =>
-              var featuresBucket = ""
-              for (name <- contentList) {
-                var ifFilter = false
-                if (filterNames.nonEmpty) {
-                  filterNames.foreach(r => if (!ifFilter && name.contains(r)) {
-                    ifFilter = true
-                  })
-                }
-                if (ifFilter) {
-                  ""
-                } else {
-                  if (features.contains(name)) {
-                    val score = features(name)
+              val featuresBucket = features.map {
+                case (name, score) =>
+                  var ifFilter = false
+                  if (filterNames.nonEmpty) {
+                    filterNames.foreach(r => if (!ifFilter && name.contains(r)) {
+                      ifFilter = true
+                    })
+                  }
+                  if (ifFilter) {
+                    ""
+                  } else {
                     if (score > 1E-8) {
                       if (bucketsMap.contains(name)) {
                         val (bucketsNum, buckets) = bucketsMap(name)
                         val scoreNew = 0.01 + 1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
-                        featuresBucket = featuresBucket + (name + ":" + scoreNew.toString)
+                        name + ":" + scoreNew.toString
                       } else {
-                        featuresBucket = featuresBucket + (name + ":" + score.toString)
+                        name + ":" + score.toString
                       }
                     } else {
-                      featuresBucket = featuresBucket + (name + ":" + "0.01")
+                      name + ":" + "0.01"
                     }
-
-                  } else {
-                    featuresBucket = featuresBucket + (name + ":" + "0.01")
                   }
-                }
-              }
+              }.filter(_.nonEmpty)
               result.add(label + "\t" + featuresBucket.mkString("\t"))
           }
           result.iterator