|
@@ -24,7 +24,7 @@ object makedata_ad_33_bucketData_20240622 {
|
|
|
|
|
|
val loader = getClass.getClassLoader
|
|
|
|
|
|
- val resourceUrlBucket = loader.getResource("20240622_ad_bucket_249.txt")
|
|
|
+ val resourceUrlBucket = loader.getResource("20240704_ad_bucket_351.txt")
|
|
|
val buckets =
|
|
|
if (resourceUrlBucket != null) {
|
|
|
val buckets = Source.fromURL(resourceUrlBucket).getLines().mkString("\n")
|
|
@@ -50,8 +50,9 @@ object makedata_ad_33_bucketData_20240622 {
|
|
|
val savePath = param.getOrElse("savePath", "/dw/recommend/model/33_ad_train_data/")
|
|
|
val beginStr = param.getOrElse("beginStr", "20240620")
|
|
|
val endStr = param.getOrElse("endStr", "20240620")
|
|
|
- val repartition = param.getOrElse("repartition", "200").toInt
|
|
|
+ val repartition = param.getOrElse("repartition", "100").toInt
|
|
|
val filterNames = param.getOrElse("filterNames", "").split(",").toSet
|
|
|
+ val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
|
|
|
|
|
|
val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
|
|
|
for (date <- dateRange) {
|
|
@@ -71,11 +72,11 @@ object makedata_ad_33_bucketData_20240622 {
|
|
|
case (logKey, labelKey, features) =>
|
|
|
val logKeyList = logKey.split(",")
|
|
|
val apptype = logKeyList(0)
|
|
|
- !Set("12").contains(apptype)
|
|
|
+ !Set("12", "13").contains(apptype)
|
|
|
}
|
|
|
.map{
|
|
|
case (logKey, labelKey, features) =>
|
|
|
- val label = JSON.parseObject(labelKey).getOrDefault("ad_is_conversion", "0").toString
|
|
|
+ val label = JSON.parseObject(labelKey).getOrDefault(whatLabel, "0").toString
|
|
|
(label, features)
|
|
|
}
|
|
|
.mapPartitions(row => {
|
|
@@ -94,8 +95,8 @@ object makedata_ad_33_bucketData_20240622 {
|
|
|
}else{
|
|
|
if (score > 1E-8) {
|
|
|
if (bucketsMap.contains(name)) {
|
|
|
- val (_, buckets) = bucketsMap(name)
|
|
|
- val scoreNew = 1.0 / (buckets.length + 1) * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
|
+ val (bucketsNum, buckets) = bucketsMap(name)
|
|
|
+ val scoreNew = 1.0 / bucketsNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
|
name + ":" + scoreNew.toString
|
|
|
} else {
|
|
|
name + ":" + score.toString
|