Selaa lähdekoodia

feat:添加对单个CID打分的脚本

zhaohaipeng 9 kuukautta sitten
vanhempi
commit
f845d7872d

+ 25 - 23
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/makedata_ad_33_bucketData_20240728.scala

@@ -74,7 +74,6 @@ object makedata_ad_33_bucketData_20240728 {
     val contentList = content.split("\n")
       .map(r => r.replace(" ", "").replaceAll("\n", ""))
       .filter(r => r.nonEmpty).toList
-    println(contentList.length)
 
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
     for (date <- dateRange) {
@@ -88,13 +87,6 @@ object makedata_ad_33_bucketData_20240728 {
           jsons.foreach(r => {
             features.put(r._1, jsons.getDoubleValue(r._1))
           })
-
-          for (name <- contentList) {
-            if (!features.contains(name)) {
-              features.put(name, 0)
-            }
-          }
-
           (logKey, labelKey, features)
         })
         .filter {
@@ -114,32 +106,42 @@ object makedata_ad_33_bucketData_20240728 {
           val bucketsMap = bucketsMap_br.value
           row.foreach {
             case (label, features) =>
-              val featuresBucket = features.map {
-                case (name, score) =>
-                  var ifFilter = false
-                  if (filterNames.nonEmpty) {
-                    filterNames.foreach(r => if (!ifFilter && name.contains(r)) {
-                      ifFilter = true
-                    })
-                  }
-                  if (ifFilter) {
-                    ""
-                  } else {
+
+              val featuresBucket: List[String] = List()
+              for (name <- contentList) {
+                var ifFilter = false
+                if (filterNames.nonEmpty) {
+                  filterNames.foreach(r => if (!ifFilter && name.contains(r)) {
+                    ifFilter = true
+                  })
+                }
+                if (ifFilter) {
+                  ""
+                } else {
+                  if (features.contains(name)) {
+                    val score = features(name)
                     if (score > 1E-8) {
                       if (bucketsMap.contains(name)) {
                         val (bucketsNum, buckets) = bucketsMap(name)
                         val scoreNew = 0.01 + 1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
-                        name + ":" + scoreNew.toString
+                        featuresBucket.add(name + ":" + scoreNew.toString)
                       } else {
-                        name + ":" + score.toString
+                        featuresBucket.add(name + ":" + score.toString)
                       }
                     } else {
-                      name + ":" + "0.01"
+                      featuresBucket.add(name + ":" + "0.01")
                     }
+
+                  } else {
+                    featuresBucket.add(name + ":" + "0.01")
                   }
-              }.filter(_.nonEmpty)
+                }
+
+              }
+
               result.add(label + "\t" + featuresBucket.mkString("\t"))
           }
+
           result.iterator
         })