|
@@ -93,6 +93,10 @@ object pred_01_xgb_ad_hdfsfile_20240813{
|
|
|
val auc = evaluator.evaluate(predictions.select("label", "probability"))
|
|
|
println("zhangbo:auc:" + auc)
|
|
|
|
|
|
+ println("---------------------------------\n")
|
|
|
+ println("----------zhangbo-------------\n")
|
|
|
+ println("---------------------------------\n")
|
|
|
+
|
|
|
// 统计分cid的分数
|
|
|
sc.textFile(hdfsPath).map(r => {
|
|
|
val rList = r.split("\t")
|
|
@@ -108,7 +112,35 @@ object pred_01_xgb_ad_hdfsfile_20240813{
|
|
|
(cid, all, zheng, scores, zheng / all, scores / all)
|
|
|
}.collect().sortBy(-_._2).map(_.productIterator.mkString("\t")).foreach(println)
|
|
|
|
|
|
+ println("---------------------------------\n")
|
|
|
+ println("----------zhangbo-------------\n")
|
|
|
+ println("---------------------------------\n")
|
|
|
|
|
|
+ sc.textFile(hdfsPath).map(r => {
|
|
|
+ val rList = r.split("\t")
|
|
|
+ val cid_hour_apptype_abcode = rList(3).split("_")
|
|
|
+ val cid = cid_hour_apptype_abcode(0)
|
|
|
+ val hour = cid_hour_apptype_abcode(1)
|
|
|
+ val apptype = cid_hour_apptype_abcode(2)
|
|
|
+ var abcode = cid_hour_apptype_abcode(3)
|
|
|
+ if (Set("ab0", "ab1", "ab2", "ab3", "ab4", "ab5", "ab6", "ab7").contains(abcode)){
|
|
|
+ abcode = "实验组"
|
|
|
+ }else{
|
|
|
+ abcode = "基线组"
|
|
|
+ }
|
|
|
+
|
|
|
+ val score = rList(2).replace("[", "").replace("]", "")
|
|
|
+ .split(",")(1).toDouble
|
|
|
+ val label = rList(0).toDouble
|
|
|
+ ((cid, apptype, abcode), (1, label, score))
|
|
|
+ }).reduceByKey {
|
|
|
+ case (a, b) => (a._1 + b._1, a._2 + b._2, a._3 + b._3)
|
|
|
+ }.map {
|
|
|
+ case ((cid, apptype, abcode), (all, zheng, scores)) =>
|
|
|
+ (cid, apptype, abcode, all, zheng, scores, zheng / all, scores / all)
|
|
|
+ }.collect().sortBy {
|
|
|
+ case (cid, apptype, abcode, all, _, _, _, _) => (cid, apptype, abcode, -all)
|
|
|
+ }.map(_.productIterator.mkString("\t")).foreach(println)
|
|
|
|
|
|
}
|
|
|
|
|
@@ -121,12 +153,24 @@ object pred_01_xgb_ad_hdfsfile_20240813{
|
|
|
val label: Int = NumberUtils.toInt(line(0))
|
|
|
val map: util.Map[String, Double] = new util.HashMap[String, Double]
|
|
|
var cid = "-1"
|
|
|
+ var hour = "-1"
|
|
|
+ var apptype = "-1"
|
|
|
+ var abcode = "-1"
|
|
|
for (i <- 1 until line.length) {
|
|
|
val fv: Array[String] = StringUtils.split(line(i), ':')
|
|
|
map.put(fv(0), NumberUtils.toDouble(fv(1), 0.0))
|
|
|
if(fv(0).startsWith("cid_")){
|
|
|
cid = fv(0).split("_")(1)
|
|
|
}
|
|
|
+ if (fv(0).startsWith("hour_")) {
|
|
|
+ hour = fv(0).split("_")(1)
|
|
|
+ }
|
|
|
+ if (fv(0).startsWith("apptype_")) {
|
|
|
+ apptype = fv(0).split("_")(1)
|
|
|
+ }
|
|
|
+ if (fv(0).startsWith("abcode_")) {
|
|
|
+ abcode = fv(0).split("_")(1)
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
val v: Array[Any] = new Array[Any](features.length + 2)
|
|
@@ -134,7 +178,7 @@ object pred_01_xgb_ad_hdfsfile_20240813{
|
|
|
for (i <- 0 until features.length) {
|
|
|
v(i + 1) = map.getOrDefault(features(i), 0.0d)
|
|
|
}
|
|
|
- v(features.length + 1) = cid
|
|
|
+ v(features.length + 1) = (cid, hour, apptype, abcode).productIterator.mkString("_")
|
|
|
Row(v: _*)
|
|
|
})
|
|
|
}
|