zhangbo 8 ヶ月 前
コミット
a4275787f3

+ 28 - 14
recommend-model-produce/src/main/scala/com/tzld/piaoquan/recommend/model/train_01_xgb_ad_20240808.scala

@@ -126,23 +126,37 @@ object train_01_xgb_ad_20240808{
 
   def createData4Ad(data: RDD[String], features: Array[String]): RDD[Row] = {
     data.map(r => {
-      val rList = r.split("\t")
-      val label = rList(0).toInt
-      val featureMap = scala.collection.mutable.Map[String, Double]()
-      var cid = -1
-      rList.drop(1).foreach(kv =>{
-        val kv_ = kv.split(":")
-        if (kv_(0).startsWith("cid_")){
-          cid = kv_(0).split("_")(1).toInt
-        }else{
-          featureMap.put(kv_(0), kv_(1).toDouble)
-        }
-      })
+//      val rList = r.split("\t")
+//      val label = rList(0).toInt
+//      val featureMap = scala.collection.mutable.Map[String, Double]()
+//      var cid = -1
+//      rList.drop(1).foreach(kv =>{
+//        val kv_ = kv.split(":")
+//        if (kv_(0).startsWith("cid_")){
+//          cid = kv_(0).split("_")(1).toInt
+//        }else{
+//          featureMap.put(kv_(0), kv_(1).toDouble)
+//        }
+//      })
+//      val v: Array[Any] = new Array[Any](features.length + 1)
+//      v(0) = label
+////      v(1) = cid
+//      for (i <- 0 until features.length) {
+//        v(i + 1) = featureMap.getOrElse(r, 0.0D)
+//      }
+//      Row(v: _*)
+val line: Array[String] = StringUtils.split(r, '\t')
+      val label: Int = NumberUtils.toInt(line(0))
+      val map: util.Map[String, Double] = new util.HashMap[String, Double]
+      for (i <- 1 until line.length) {
+        val fv: Array[String] = StringUtils.split(line(i), ':')
+        map.put(fv(0), NumberUtils.toDouble(fv(1), 0.0))
+      }
+
       val v: Array[Any] = new Array[Any](features.length + 1)
       v(0) = label
-//      v(1) = cid
       for (i <- 0 until features.length) {
-        v(i + 1) = featureMap.getOrElse(r, 0.0D)
+        v(i + 1) = map.getOrDefault(features(i), 0.0d)
       }
       Row(v: _*)
     })