Ver código fonte

兼容新增的字符串类型

xueyiming 2 meses atrás
pai
commit
dd5d9bf0c4

+ 13 - 8
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_31_originData_hive_20240718.scala

@@ -13,6 +13,7 @@ import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
 import scala.io.Source
 import scala.language.postfixOps
+import scala.util.Try
 
 /*
    20240608 提取特征
@@ -486,9 +487,9 @@ object makedata_ad_31_originData_hive_20240718 {
           val labelKey = record.getOrElse("labelKey", "")
           val featureKey = record.getOrElse("featureKey", "")
           val jsons = JSON.parseObject(featureKey)
-          val features = scala.collection.mutable.Map[String, Double]()
+          val features = scala.collection.mutable.Map[String, String]()
           jsons.foreach(r => {
-            features.put(r._1, jsons.getDoubleValue(r._1))
+            features.put(r._1, jsons.getString(r._1))
           })
           (logKey, labelKey, features)
         }).filter {
@@ -502,18 +503,22 @@ object makedata_ad_31_originData_hive_20240718 {
             val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
             val bucketsMap = bucketsMap_br.value
             var resultMap = features.collect {
-              case (name, score) if !filterNames.exists(name.contains) && score > 1E-8 =>
+              case (name, str) if !filterNames.exists(name.contains) =>
                 var key = name.replace("*", "_x_").replace("(view)", "_view")
                 if (key == "ad_is_click") {
                   key = "has_click"
                 }
-                val value = if (bucketsMap.contains(name)) {
-                  val (bucketsNum, buckets) = bucketsMap(name)
-                  1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
+                if (Try(str.toDouble).isSuccess && str.toDouble > 1E-8) {
+                  val value = if (bucketsMap.contains(name)) {
+                    val (bucketsNum, buckets) = bucketsMap(name)
+                    1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, str.toDouble).toDouble + 1.0)
+                  } else {
+                    str.toDouble
+                  }
+                  key -> value.toString
                 } else {
-                  score
+                  key -> str
                 }
-                key -> value.toString
             }.toMap
             resultMap += ("has_conversion" -> label)
             resultMap += ("logkey" -> logKey)