|
@@ -40,7 +40,7 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
|
|
|
|
|
|
val loader = getClass.getClassLoader
|
|
|
- val resourceUrl = loader.getResource("20240718_ad_feature_name.txt")
|
|
|
+ val resourceUrl = loader.getResource("20240703_ad_feature_name.txt")
|
|
|
val content =
|
|
|
if (resourceUrl != null) {
|
|
|
val content = Source.fromURL(resourceUrl).getLines().mkString("\n")
|
|
@@ -89,35 +89,10 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
partition = partition,
|
|
|
transfer = func,
|
|
|
numPartition = tablePart)
|
|
|
-// .filter(record =>{
|
|
|
-// val flag1 = record.isNull("metafeaturemap")
|
|
|
-// val flag2 = record.isNull("extend")
|
|
|
-// if (flag1 || flag2){
|
|
|
-// false
|
|
|
-// }else{
|
|
|
-// val apptype = record.getString("apptype")
|
|
|
-// val extend = record.getString("extend")
|
|
|
-// val abcode = JSON.parseObject(extend).getString("abcode")
|
|
|
-// val scoreMap = record.getString("scoremap")
|
|
|
-// val ctcvr = JSON.parseObject(scoreMap).getString("ctcvrScore").toDouble
|
|
|
-// if (
|
|
|
-// apptype.equals("4")
|
|
|
-// && Set("ab0", "ab1", "ab2", "ab3", "ab4").contains(abcode)
|
|
|
-// ) {
|
|
|
-// true
|
|
|
-// } else {
|
|
|
-// false
|
|
|
-// }
|
|
|
-// }
|
|
|
-// })
|
|
|
.map(record => {
|
|
|
-
|
|
|
val ts = record.getString("ts").toInt
|
|
|
val cid = record.getString("cid")
|
|
|
-
|
|
|
-
|
|
|
val featureMap = new JSONObject()
|
|
|
-
|
|
|
val b1: JSONObject = if (record.isNull("b1_feature")) new JSONObject() else
|
|
|
JSON.parseObject(record.getString("b1_feature"))
|
|
|
val b2: JSONObject = if (record.isNull("b2_feature")) new JSONObject() else
|
|
@@ -136,8 +111,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
JSON.parseObject(record.getString("b8_feature"))
|
|
|
val b9: JSONObject = if (record.isNull("b9_feature")) new JSONObject() else
|
|
|
JSON.parseObject(record.getString("b9_feature"))
|
|
|
-
|
|
|
-
|
|
|
featureMap.put("cid_" + cid, idDefaultValue)
|
|
|
// if (b1.containsKey("adid") && b1.getString("adid").nonEmpty) {
|
|
|
// featureMap.put("adid_" + b1.getString("adid"), idDefaultValue)
|
|
@@ -150,14 +123,11 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
// }
|
|
|
val hour = DateTimeUtil.getHourByTimestamp(ts)
|
|
|
featureMap.put("hour_" + hour, 0.1)
|
|
|
-
|
|
|
val dayOfWeek = DateTimeUtil.getDayOrWeekByTimestamp(ts)
|
|
|
featureMap.put("dayofweek_" + dayOfWeek, 0.1);
|
|
|
-
|
|
|
if (b1.containsKey("cpa")) {
|
|
|
featureMap.put("cpa", b1.getString("cpa").toDouble)
|
|
|
}
|
|
|
-
|
|
|
for ((bn, prefix1) <- List(
|
|
|
(b2, "b2"), (b3, "b3"), (b4, "b4"), (b5, "b5"), (b8, "b8"), (b9, "b9")
|
|
|
)) {
|
|
@@ -184,7 +154,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver*ctcvr", conver * f2)
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
for ((bn, prefix1) <- List(
|
|
|
(b6, "b6"), (b7, "b7")
|
|
|
)) {
|
|
@@ -211,10 +180,8 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "conver*ctcvr", conver * f2)
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
val c1: JSONObject = if (record.isNull("c1_feature")) new JSONObject() else
|
|
|
JSON.parseObject(record.getString("c1_feature"))
|
|
|
-
|
|
|
val midActionList = if (c1.containsKey("action") && c1.getString("action").nonEmpty) {
|
|
|
c1.getString("action").split(",").map(r => {
|
|
|
val rList = r.split(":")
|
|
@@ -236,7 +203,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
featureMap.put("ctcvr_all", RankExtractorFeature_20240530.calDiv(converAll, viewAll))
|
|
|
featureMap.put("cvr_all", RankExtractorFeature_20240530.calDiv(clickAll, converAll))
|
|
|
// featureMap.put("ecpm_all", RankExtractorFeature_20240530.calDiv(incomeAll * 1000, viewAll))
|
|
|
-
|
|
|
// ui特征
|
|
|
val midTimeDiff = scala.collection.mutable.Map[String, Double]()
|
|
|
midActionList.foreach {
|
|
@@ -251,7 +217,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
midTimeDiff.put("timediff_conver_" + cid, 1.0 / ((ts - ts_history).toDouble / 3600.0 / 24.0))
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
val midActionStatic = scala.collection.mutable.Map[String, Double]()
|
|
|
midActionList.foreach {
|
|
|
case (cid, (ts_history, click, conver, income, title)) =>
|
|
@@ -260,7 +225,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
midActionStatic.put("actionstatic_conver_" + cid, conver + midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0))
|
|
|
midActionStatic.put("actionstatic_income_" + cid, income + midActionStatic.getOrDefault("actionstatic_income_" + cid, 0.0))
|
|
|
}
|
|
|
-
|
|
|
if (midTimeDiff.contains("timediff_view_" + cid)) {
|
|
|
featureMap.put("timediff_view", midTimeDiff.getOrDefault("timediff_view_" + cid, 0.0))
|
|
|
}
|
|
@@ -300,7 +264,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0)
|
|
|
))
|
|
|
}
|
|
|
-
|
|
|
val e1: JSONObject = if (record.isNull("e1_feature")) new JSONObject() else
|
|
|
JSON.parseObject(record.getString("e1_feature"))
|
|
|
val e2: JSONObject = if (record.isNull("e2_feature")) new JSONObject() else
|
|
@@ -319,14 +282,12 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
val d1: JSONObject = if (record.isNull("d1_feature")) new JSONObject() else
|
|
|
JSON.parseObject(record.getString("d1_feature"))
|
|
|
val d2: JSONObject = if (record.isNull("d2_feature")) new JSONObject() else
|
|
|
JSON.parseObject(record.getString("d2_feature"))
|
|
|
val d3: JSONObject = if (record.isNull("d3_feature")) new JSONObject() else
|
|
|
JSON.parseObject(record.getString("d3_feature"))
|
|
|
-
|
|
|
if (d1.nonEmpty) {
|
|
|
for (prefix <- List("3h", "6h", "12h", "1d", "3d", "7d")) {
|
|
|
val view = if (!d1.containsKey("ad_view_" + prefix)) 0D else d1.getIntValue("ad_view_" + prefix).toDouble
|
|
@@ -345,7 +306,6 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
// featureMap.put("d1_feature" + "_" + prefix + "_" + "ecpm", f5)
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
val vidRankMaps = scala.collection.mutable.Map[String, scala.collection.immutable.Map[String, Double]]()
|
|
|
if (d2.nonEmpty) {
|
|
|
d2.foreach(r => {
|
|
@@ -373,9 +333,7 @@ object makedata_ad_34_bucketDataPrint_20241217 {
|
|
|
val score = Similarity.conceptSimilarity(title, vTitle)
|
|
|
featureMap.put("ctitle_vtitle_similarity", score);
|
|
|
}
|
|
|
-
|
|
|
val flag = record.isNull("metafeaturemap")
|
|
|
-
|
|
|
val allfeaturemap = if (record.isNull("allfeaturemap")) new JSONObject() else
|
|
|
JSON.parseObject(record.getString("allfeaturemap"))
|
|
|
val apptype = record.getString("apptype")
|