|
@@ -13,6 +13,7 @@ import scala.collection.JavaConversions._
|
|
import scala.collection.mutable.ArrayBuffer
|
|
import scala.collection.mutable.ArrayBuffer
|
|
import scala.io.Source
|
|
import scala.io.Source
|
|
import scala.language.postfixOps
|
|
import scala.language.postfixOps
|
|
|
|
+import scala.util.Try
|
|
|
|
|
|
/*
|
|
/*
|
|
20240608 提取特征
|
|
20240608 提取特征
|
|
@@ -486,9 +487,9 @@ object makedata_ad_31_originData_hive_20240718 {
|
|
val labelKey = record.getOrElse("labelKey", "")
|
|
val labelKey = record.getOrElse("labelKey", "")
|
|
val featureKey = record.getOrElse("featureKey", "")
|
|
val featureKey = record.getOrElse("featureKey", "")
|
|
val jsons = JSON.parseObject(featureKey)
|
|
val jsons = JSON.parseObject(featureKey)
|
|
- val features = scala.collection.mutable.Map[String, Double]()
|
|
|
|
|
|
+ val features = scala.collection.mutable.Map[String, String]()
|
|
jsons.foreach(r => {
|
|
jsons.foreach(r => {
|
|
- features.put(r._1, jsons.getDoubleValue(r._1))
|
|
|
|
|
|
+ features.put(r._1, jsons.getString(r._1))
|
|
})
|
|
})
|
|
(logKey, labelKey, features)
|
|
(logKey, labelKey, features)
|
|
}).filter {
|
|
}).filter {
|
|
@@ -502,18 +503,22 @@ object makedata_ad_31_originData_hive_20240718 {
|
|
val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
|
|
val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
|
|
val bucketsMap = bucketsMap_br.value
|
|
val bucketsMap = bucketsMap_br.value
|
|
var resultMap = features.collect {
|
|
var resultMap = features.collect {
|
|
- case (name, score) if !filterNames.exists(name.contains) && score > 1E-8 =>
|
|
|
|
|
|
+ case (name, str) if !filterNames.exists(name.contains) =>
|
|
var key = name.replace("*", "_x_").replace("(view)", "_view")
|
|
var key = name.replace("*", "_x_").replace("(view)", "_view")
|
|
if (key == "ad_is_click") {
|
|
if (key == "ad_is_click") {
|
|
key = "has_click"
|
|
key = "has_click"
|
|
}
|
|
}
|
|
- val value = if (bucketsMap.contains(name)) {
|
|
|
|
- val (bucketsNum, buckets) = bucketsMap(name)
|
|
|
|
- 1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
|
|
|
|
+ if (Try(str.toDouble).isSuccess && str.toDouble > 1E-8) {
|
|
|
|
+ val value = if (bucketsMap.contains(name)) {
|
|
|
|
+ val (bucketsNum, buckets) = bucketsMap(name)
|
|
|
|
+ 1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, str.toDouble).toDouble + 1.0)
|
|
|
|
+ } else {
|
|
|
|
+ str.toDouble
|
|
|
|
+ }
|
|
|
|
+ key -> value.toString
|
|
} else {
|
|
} else {
|
|
- score
|
|
|
|
|
|
+ key -> str
|
|
}
|
|
}
|
|
- key -> value.toString
|
|
|
|
}.toMap
|
|
}.toMap
|
|
resultMap += ("has_conversion" -> label)
|
|
resultMap += ("has_conversion" -> label)
|
|
resultMap += ("logkey" -> logKey)
|
|
resultMap += ("logkey" -> logKey)
|