|
@@ -126,23 +126,37 @@ object train_01_xgb_ad_20240808{
|
|
|
|
|
|
def createData4Ad(data: RDD[String], features: Array[String]): RDD[Row] = {
|
|
|
data.map(r => {
|
|
|
- val rList = r.split("\t")
|
|
|
- val label = rList(0).toInt
|
|
|
- val featureMap = scala.collection.mutable.Map[String, Double]()
|
|
|
- var cid = -1
|
|
|
- rList.drop(1).foreach(kv =>{
|
|
|
- val kv_ = kv.split(":")
|
|
|
- if (kv_(0).startsWith("cid_")){
|
|
|
- cid = kv_(0).split("_")(1).toInt
|
|
|
- }else{
|
|
|
- featureMap.put(kv_(0), kv_(1).toDouble)
|
|
|
- }
|
|
|
- })
|
|
|
+// val rList = r.split("\t")
|
|
|
+// val label = rList(0).toInt
|
|
|
+// val featureMap = scala.collection.mutable.Map[String, Double]()
|
|
|
+// var cid = -1
|
|
|
+// rList.drop(1).foreach(kv =>{
|
|
|
+// val kv_ = kv.split(":")
|
|
|
+// if (kv_(0).startsWith("cid_")){
|
|
|
+// cid = kv_(0).split("_")(1).toInt
|
|
|
+// }else{
|
|
|
+// featureMap.put(kv_(0), kv_(1).toDouble)
|
|
|
+// }
|
|
|
+// })
|
|
|
+// val v: Array[Any] = new Array[Any](features.length + 1)
|
|
|
+// v(0) = label
|
|
|
+//// v(1) = cid
|
|
|
+// for (i <- 0 until features.length) {
|
|
|
+// v(i + 1) = featureMap.getOrElse(r, 0.0D)
|
|
|
+// }
|
|
|
+// Row(v: _*)
|
|
|
+val line: Array[String] = StringUtils.split(r, '\t')
|
|
|
+ val label: Int = NumberUtils.toInt(line(0))
|
|
|
+ val map: util.Map[String, Double] = new util.HashMap[String, Double]
|
|
|
+ for (i <- 1 until line.length) {
|
|
|
+ val fv: Array[String] = StringUtils.split(line(i), ':')
|
|
|
+ map.put(fv(0), NumberUtils.toDouble(fv(1), 0.0))
|
|
|
+ }
|
|
|
+
|
|
|
val v: Array[Any] = new Array[Any](features.length + 1)
|
|
|
v(0) = label
|
|
|
-// v(1) = cid
|
|
|
for (i <- 0 until features.length) {
|
|
|
- v(i + 1) = featureMap.getOrElse(r, 0.0D)
|
|
|
+ v(i + 1) = map.getOrDefault(features(i), 0.0d)
|
|
|
}
|
|
|
Row(v: _*)
|
|
|
})
|