|  | @@ -61,11 +61,15 @@ object train_01_xgb_ad_20240808{
 | 
	
		
			
				|  |  |      )
 | 
	
		
			
				|  |  |      println("zhangbo:train data size:" + trainData.count())
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    val fields = Array(
 | 
	
		
			
				|  |  | +    var fields = Array(
 | 
	
		
			
				|  |  |        DataTypes.createStructField("label", DataTypes.IntegerType, true)
 | 
	
		
			
				|  |  |  //      DataTypes.createStructField("logKey", DataTypes.IntegerType, true)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      ) ++ features.map(f => DataTypes.createStructField(f, DataTypes.DoubleType, true))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    fields = fields ++ Array(
 | 
	
		
			
				|  |  | +      DataTypes.createStructField("logKey", DataTypes.StringType, true)
 | 
	
		
			
				|  |  | +    )
 | 
	
		
			
				|  |  |      val schema = DataTypes.createStructType(fields)
 | 
	
		
			
				|  |  |      val trainDataSet: Dataset[Row] = spark.createDataFrame(trainData, schema)
 | 
	
		
			
				|  |  |      val vectorAssembler = new VectorAssembler().setInputCols(features).setOutputCol("features")
 | 
	
	
		
			
				|  | @@ -100,9 +104,9 @@ object train_01_xgb_ad_20240808{
 | 
	
		
			
				|  |  |      val testDataSetTrans = vectorAssembler.transform(testDataSet).select("features","label")
 | 
	
		
			
				|  |  |      val predictions = model.transform(testDataSetTrans)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    val saveData = predictions.select("label", "rawPrediction", "probability").rdd
 | 
	
		
			
				|  |  | +    val saveData = predictions.select("label", "rawPrediction", "probability", "logKey").rdd
 | 
	
		
			
				|  |  |        .map(r =>{
 | 
	
		
			
				|  |  | -        (r.get(0), r.get(1), r.get(2)).productIterator.mkString("\t")
 | 
	
		
			
				|  |  | +        (r.get(0), r.get(1), r.get(2), r.get(3)).productIterator.mkString("\t")
 | 
	
		
			
				|  |  |      })
 | 
	
		
			
				|  |  |      val hdfsPath = savePath
 | 
	
		
			
				|  |  |      if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
 | 
	
	
		
			
				|  | @@ -148,16 +152,21 @@ object train_01_xgb_ad_20240808{
 | 
	
		
			
				|  |  |  val line: Array[String] = StringUtils.split(r, '\t')
 | 
	
		
			
				|  |  |        val label: Int = NumberUtils.toInt(line(0))
 | 
	
		
			
				|  |  |        val map: util.Map[String, Double] = new util.HashMap[String, Double]
 | 
	
		
			
				|  |  | +      var cid = "-1"
 | 
	
		
			
				|  |  |        for (i <- 1 until line.length) {
 | 
	
		
			
				|  |  |          val fv: Array[String] = StringUtils.split(line(i), ':')
 | 
	
		
			
				|  |  |          map.put(fv(0), NumberUtils.toDouble(fv(1), 0.0))
 | 
	
		
			
				|  |  | +        if(fv(0).startsWith("cid_")){
 | 
	
		
			
				|  |  | +          cid = fv(0).split("_")(1)
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -      val v: Array[Any] = new Array[Any](features.length + 1)
 | 
	
		
			
				|  |  | +      val v: Array[Any] = new Array[Any](features.length + 2)
 | 
	
		
			
				|  |  |        v(0) = label
 | 
	
		
			
				|  |  |        for (i <- 0 until features.length) {
 | 
	
		
			
				|  |  |          v(i + 1) = map.getOrDefault(features(i), 0.0d)
 | 
	
		
			
				|  |  |        }
 | 
	
		
			
				|  |  | +      v(features.length + 1) = cid
 | 
	
		
			
				|  |  |        Row(v: _*)
 | 
	
		
			
				|  |  |      })
 | 
	
		
			
				|  |  |    }
 |