|
@@ -84,14 +84,12 @@ object makedata_ad_33_bucketDataToHive_20250110 {
|
|
}
|
|
}
|
|
.map {
|
|
.map {
|
|
case (logKey, labelKey, features) =>
|
|
case (logKey, labelKey, features) =>
|
|
- val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
|
|
|
|
|
|
+ val labelObject = JSON.parseObject(labelKey)
|
|
|
|
+ val label = labelObject.getOrDefault(whatLabel, "0").toString
|
|
val bucketsMap = bucketsMap_br.value
|
|
val bucketsMap = bucketsMap_br.value
|
|
var resultMap = features.collect {
|
|
var resultMap = features.collect {
|
|
case (name, score) if !filterNames.exists(name.contains) && score > 1E-8 =>
|
|
case (name, score) if !filterNames.exists(name.contains) && score > 1E-8 =>
|
|
var key = name.replace("*", "_x_").replace("(view)", "_view")
|
|
var key = name.replace("*", "_x_").replace("(view)", "_view")
|
|
- if (key == "ad_is_click") {
|
|
|
|
- key = "has_click"
|
|
|
|
- }
|
|
|
|
val value = if (bucketsMap.contains(name)) {
|
|
val value = if (bucketsMap.contains(name)) {
|
|
val (bucketsNum, buckets) = bucketsMap(name)
|
|
val (bucketsNum, buckets) = bucketsMap(name)
|
|
1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
@@ -100,7 +98,8 @@ object makedata_ad_33_bucketDataToHive_20250110 {
|
|
}
|
|
}
|
|
key -> value.toString
|
|
key -> value.toString
|
|
}.toMap
|
|
}.toMap
|
|
- resultMap += ("has_conversion" -> label)
|
|
|
|
|
|
+ resultMap += ("has_click" -> labelObject.getString("ad_is_click"))
|
|
|
|
+ resultMap += ("has_conversion" -> labelObject.getString("ad_is_conversion"))
|
|
resultMap += ("logkey" -> logKey)
|
|
resultMap += ("logkey" -> logKey)
|
|
(label.toInt, resultMap, Random.nextDouble())
|
|
(label.toInt, resultMap, Random.nextDouble())
|
|
}.filter(r => r._3 < negSampleRate || r._1 > 0)
|
|
}.filter(r => r._3 < negSampleRate || r._1 > 0)
|