|
@@ -421,15 +421,21 @@ object makedata_31_bucketDataPrint_20240821 {
|
|
|
val labelKey = labels.toString()
|
|
|
val label = record.getString("ad_is_conversion")
|
|
|
//6 拼接数据,保存。
|
|
|
- (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap, !allfeature.containsKey("weight_sum"))
|
|
|
+ (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap)
|
|
|
}).filter {
|
|
|
- case (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap, flag) =>
|
|
|
- flag
|
|
|
+ case (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap) =>
|
|
|
+ if (allfeature.isEmpty) {
|
|
|
+ return false
|
|
|
+ } else if (allfeature.containsKey("weight_sum") || allfeature.contains("weight")) {
|
|
|
+ return false
|
|
|
+ }
|
|
|
+
|
|
|
+ return true
|
|
|
}.mapPartitions(row => {
|
|
|
val result = new ArrayBuffer[String]()
|
|
|
val bucketsMap = bucketsMap_br.value
|
|
|
row.foreach {
|
|
|
- case (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap, flag) =>
|
|
|
+ case (apptype, mid, cid, ts, headvideoid, label, allfeature, featureMap) =>
|
|
|
val offlineFeatureMap = featureMap.map(r => {
|
|
|
val score = r._2.toString.toDouble
|
|
|
val name = r._1
|
|
@@ -471,9 +477,16 @@ object makedata_31_bucketDataPrint_20240821 {
|
|
|
// 680实验,517个特征
|
|
|
row.foreach(r => {
|
|
|
val rList = r.split("\t")
|
|
|
+ val cid = rList(2).toString
|
|
|
val label = rList(5).toString
|
|
|
val allFeatureMap = JSON.parseObject(rList(6)).toMap.map(r => (r._1, r._2.toString))
|
|
|
val offlineFeature = rList(7).split(",").map(r => (r.split(":")(0), r.split(":")(1))).toMap
|
|
|
+ if (!allFeatureMap.containsKey("cid_" + cid)) {
|
|
|
+ allFeatureMap.put("cid_" + cid, "0.1");
|
|
|
+ }
|
|
|
+ if (!offlineFeature.containsKey("cid_" + cid)) {
|
|
|
+ offlineFeature.containsKey("cid_" + cid);
|
|
|
+ }
|
|
|
val offlineFeatureList = offlineFeature.map {
|
|
|
case (key, value) =>
|
|
|
key + ":" + value
|