|
@@ -45,34 +45,35 @@ object makedata_recsys_61_nor_sample_20241209 {
|
|
|
val rList = r.split("\t")
|
|
|
val logKey = rList(0)
|
|
|
val labelKey = rList(1)
|
|
|
- val jsons = JSON.parseObject(rList(2))
|
|
|
+ val scoresMap = rList(2)
|
|
|
+ val jsons = JSON.parseObject(rList(3))
|
|
|
val features = scala.collection.mutable.Map[String, Double]()
|
|
|
jsons.foreach(r => {
|
|
|
features.put(r._1, jsons.getDoubleValue(r._1))
|
|
|
})
|
|
|
- (logKey, labelKey, features)
|
|
|
+ (logKey, labelKey, scoresMap, features)
|
|
|
})
|
|
|
.filter {
|
|
|
- case (logKey, labelKey, features) =>
|
|
|
+ case (logKey, labelKey, scoresMap, features) =>
|
|
|
val logKeyList = logKey.split(",")
|
|
|
val apptype = logKeyList(0)
|
|
|
val pagesource = logKeyList(1)
|
|
|
whatApps.contains(apptype) && pagesource.endsWith("recommend")
|
|
|
}.filter {
|
|
|
- case (logKey, labelKey, features) =>
|
|
|
+ case (logKey, labelKey, scoresMap, features) =>
|
|
|
val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString.toDouble
|
|
|
label > 0 || new Random().nextDouble() <= fuSampleRate
|
|
|
}
|
|
|
.map {
|
|
|
- case (logKey, labelKey, features) =>
|
|
|
+ case (logKey, labelKey, scoresMap, features) =>
|
|
|
val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString.toDouble
|
|
|
- (logKey, label, features)
|
|
|
+ (logKey, label, scoresMap, features)
|
|
|
}
|
|
|
.mapPartitions(row => {
|
|
|
val result = new ArrayBuffer[String]()
|
|
|
val bucketsMap = bucketsMap_br.value
|
|
|
row.foreach {
|
|
|
- case (logKey, label, features) =>
|
|
|
+ case (logKey, label, scoresMap, features) =>
|
|
|
val featuresBucket = features.map {
|
|
|
case (name, score) =>
|
|
|
if (!featureNameSet.contains(name)) {
|
|
@@ -91,7 +92,7 @@ object makedata_recsys_61_nor_sample_20241209 {
|
|
|
}
|
|
|
}
|
|
|
}.filter(_.nonEmpty)
|
|
|
- result.add(logKey + "\t" + label + "\t" + featuresBucket.mkString("\t"))
|
|
|
+ result.add(logKey + "\t" + label + "\t" + scoresMap + "\t" + featuresBucket.mkString("\t"))
|
|
|
}
|
|
|
result.iterator
|
|
|
})
|