Browse Source

add score map

jch 5 months ago
parent
commit
8a756ba357

+ 9 - 8
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_61_nor_sample_20241209.scala

@@ -45,34 +45,35 @@ object makedata_recsys_61_nor_sample_20241209 {
           val rList = r.split("\t")
           val logKey = rList(0)
           val labelKey = rList(1)
-          val jsons = JSON.parseObject(rList(2))
+          val scoresMap = rList(2)
+          val jsons = JSON.parseObject(rList(3))
           val features = scala.collection.mutable.Map[String, Double]()
           jsons.foreach(r => {
             features.put(r._1, jsons.getDoubleValue(r._1))
           })
-          (logKey, labelKey, features)
+          (logKey, labelKey, scoresMap, features)
         })
         .filter {
-          case (logKey, labelKey, features) =>
+          case (logKey, labelKey, scoresMap, features) =>
             val logKeyList = logKey.split(",")
             val apptype = logKeyList(0)
             val pagesource = logKeyList(1)
             whatApps.contains(apptype) && pagesource.endsWith("recommend")
         }.filter {
-          case (logKey, labelKey, features) =>
+          case (logKey, labelKey, scoresMap, features) =>
             val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString.toDouble
             label > 0 || new Random().nextDouble() <= fuSampleRate
         }
         .map {
-          case (logKey, labelKey, features) =>
+          case (logKey, labelKey, scoresMap, features) =>
             val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString.toDouble
-            (logKey, label, features)
+            (logKey, label, scoresMap, features)
         }
         .mapPartitions(row => {
           val result = new ArrayBuffer[String]()
           val bucketsMap = bucketsMap_br.value
           row.foreach {
-            case (logKey, label, features) =>
+            case (logKey, label, scoresMap, features) =>
               val featuresBucket = features.map {
                 case (name, score) =>
                   if (!featureNameSet.contains(name)) {
@@ -91,7 +92,7 @@ object makedata_recsys_61_nor_sample_20241209 {
                     }
                   }
               }.filter(_.nonEmpty)
-              result.add(logKey + "\t" + label + "\t" + featuresBucket.mkString("\t"))
+              result.add(logKey + "\t" + label + "\t" + scoresMap + "\t" + featuresBucket.mkString("\t"))
           }
           result.iterator
         })

+ 9 - 8
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_61_rov_sample_20241209.scala

@@ -49,34 +49,35 @@ object makedata_recsys_61_rov_sample_20241209 {
           val rList = r.split("\t")
           val logKey = rList(0)
           val labelKey = rList(1)
-          val jsons = JSON.parseObject(rList(2))
+          val scoresMap = rList(2)
+          val jsons = JSON.parseObject(rList(3))
           val features = scala.collection.mutable.Map[String, Double]()
           jsons.foreach(r => {
             features.put(r._1, jsons.getDoubleValue(r._1))
           })
-          (logKey, labelKey, features)
+          (logKey, labelKey, scoresMap, features)
         })
         .filter {
-          case (logKey, labelKey, features) =>
+          case (logKey, labelKey, scoresMap, features) =>
             val logKeyList = logKey.split(",")
             val apptype = logKeyList(0)
             val pagesource = logKeyList(1)
             whatApps.contains(apptype) && pagesource.endsWith("recommend")
         }.filter {
-          case (logKey, labelKey, features) =>
+          case (logKey, labelKey, scoresMap, features) =>
             val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
             "1".equals(label) || new Random().nextDouble() <= fuSampleRate
         }
         .map {
-          case (logKey, labelKey, features) =>
+          case (logKey, labelKey, scoresMap, features) =>
             val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
-            (logKey, label, features)
+            (logKey, label, scoresMap, features)
         }
         .mapPartitions(row => {
           val result = new ArrayBuffer[String]()
           val bucketsMap = bucketsMap_br.value
           row.foreach {
-            case (logKey, label, features) =>
+            case (logKey, label, scoresMap, features) =>
               val featuresBucket = features.map {
                 case (name, score) =>
                   if (!featureNameSet.contains(name)) {
@@ -95,7 +96,7 @@ object makedata_recsys_61_rov_sample_20241209 {
                     }
                   }
               }.filter(_.nonEmpty)
-              result.add(logKey + "\t" + label + "\t" + featuresBucket.mkString("\t"))
+              result.add(logKey + "\t" + label + "\t" + scoresMap + "\t" + featuresBucket.mkString("\t"))
           }
           result.iterator
         })