|
@@ -20,6 +20,9 @@ import scala.collection.mutable.ArrayBuffer
|
|
|
|
|
|
object makedata_ad_31_originData_20250110 {
|
|
object makedata_ad_31_originData_20250110 {
|
|
val WILSON_ZSCORE = 1.96
|
|
val WILSON_ZSCORE = 1.96
|
|
|
|
+ val CTR_SMOOTH_BETA_FACTOR = 25
|
|
|
|
+ val CVR_SMOOTH_BETA_FACTOR = 10
|
|
|
|
+ val CTCVR_SMOOTH_BETA_FACTOR = 100
|
|
|
|
|
|
def main(args: Array[String]): Unit = {
|
|
def main(args: Array[String]): Unit = {
|
|
val spark = SparkSession
|
|
val spark = SparkSession
|
|
@@ -132,15 +135,16 @@ object makedata_ad_31_originData_20250110 {
|
|
val conver = if (bn.isEmpty) 0D else bn.getIntValue("ad_conversion_" + prefix2).toDouble
|
|
val conver = if (bn.isEmpty) 0D else bn.getIntValue("ad_conversion_" + prefix2).toDouble
|
|
val income = if (bn.isEmpty) 0D else bn.getIntValue("ad_income_" + prefix2).toDouble
|
|
val income = if (bn.isEmpty) 0D else bn.getIntValue("ad_income_" + prefix2).toDouble
|
|
// NOTE(zhoutian):
|
|
// NOTE(zhoutian):
|
|
- // 这里cpc只是为了计算cpm的平滑的工具量,没有实际业务意义,因为cpm并非比率,本身不适合直接计算平滑
|
|
|
|
|
|
+ // 这里cpc只是为了计算cpm的平滑的工具量,没有实际业务意义,因为cpm并非比率,本身不适合直接计算Wilson平滑
|
|
// 不使用cpa的原因是未来可能出现广告采用cpc计费的情况或者无法获取转化量的情况,用点击更为稳定
|
|
// 不使用cpa的原因是未来可能出现广告采用cpc计费的情况或者无法获取转化量的情况,用点击更为稳定
|
|
// 其它几组特征亦采用相同逻辑
|
|
// 其它几组特征亦采用相同逻辑
|
|
|
|
+ // 2025-02-17改为增加固定分母平滑,income实际已经可以直接参与cpm平滑计算
|
|
val cpc = if (click == 0) 0D else income / click
|
|
val cpc = if (click == 0) 0D else income / click
|
|
- val f1 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE)
|
|
|
|
- val f2 = RankExtractorFeature_20240530.divSmooth1(conver, view, WILSON_ZSCORE)
|
|
|
|
- val f3 = RankExtractorFeature_20240530.divSmooth1(conver, click, WILSON_ZSCORE)
|
|
|
|
|
|
+ val f1 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR)
|
|
|
|
+ val f2 = RankExtractorFeature_20240530.divSmooth2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)
|
|
|
|
+ val f3 = RankExtractorFeature_20240530.divSmooth2(conver, click, CVR_SMOOTH_BETA_FACTOR)
|
|
val f4 = conver
|
|
val f4 = conver
|
|
- val f5 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE) * cpc * 1000
|
|
|
|
|
|
+ val f5 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR) * cpc * 1000
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctr", f1)
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctr", f1)
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctcvr", f2)
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctcvr", f2)
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "cvr", f3)
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "cvr", f3)
|
|
@@ -164,11 +168,11 @@ object makedata_ad_31_originData_20250110 {
|
|
val conver = if (bn.isEmpty) 0D else bn.getIntValue("ad_conversion_" + prefix2).toDouble
|
|
val conver = if (bn.isEmpty) 0D else bn.getIntValue("ad_conversion_" + prefix2).toDouble
|
|
val income = if (bn.isEmpty) 0D else bn.getIntValue("ad_income_" + prefix2).toDouble
|
|
val income = if (bn.isEmpty) 0D else bn.getIntValue("ad_income_" + prefix2).toDouble
|
|
val cpc = if (click == 0) 0D else income / click
|
|
val cpc = if (click == 0) 0D else income / click
|
|
- val f1 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE)
|
|
|
|
- val f2 = RankExtractorFeature_20240530.divSmooth1(conver, view, WILSON_ZSCORE)
|
|
|
|
- val f3 = RankExtractorFeature_20240530.divSmooth1(conver, click, WILSON_ZSCORE)
|
|
|
|
|
|
+ val f1 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR)
|
|
|
|
+ val f2 = RankExtractorFeature_20240530.divSmooth2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)
|
|
|
|
+ val f3 = RankExtractorFeature_20240530.divSmooth2(conver, click, CVR_SMOOTH_BETA_FACTOR)
|
|
val f4 = conver
|
|
val f4 = conver
|
|
- val f5 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE) * cpc * 1000
|
|
|
|
|
|
+ val f5 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR) * cpc * 1000
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctr", f1)
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctr", f1)
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctcvr", f2)
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctcvr", f2)
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "cvr", f3)
|
|
featureMap.put(prefix1 + "_" + prefix2 + "_" + "cvr", f3)
|
|
@@ -303,11 +307,11 @@ object makedata_ad_31_originData_20250110 {
|
|
val conver = if (!d1.containsKey("ad_conversion_" + prefix)) 0D else d1.getIntValue("ad_conversion_" + prefix).toDouble
|
|
val conver = if (!d1.containsKey("ad_conversion_" + prefix)) 0D else d1.getIntValue("ad_conversion_" + prefix).toDouble
|
|
val income = if (!d1.containsKey("ad_income_" + prefix)) 0D else d1.getIntValue("ad_income_" + prefix).toDouble
|
|
val income = if (!d1.containsKey("ad_income_" + prefix)) 0D else d1.getIntValue("ad_income_" + prefix).toDouble
|
|
val cpc = if (click == 0) 0D else income / click
|
|
val cpc = if (click == 0) 0D else income / click
|
|
- val f1 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE)
|
|
|
|
- val f2 = RankExtractorFeature_20240530.divSmooth1(conver, view, WILSON_ZSCORE)
|
|
|
|
- val f3 = RankExtractorFeature_20240530.divSmooth1(conver, click, WILSON_ZSCORE)
|
|
|
|
|
|
+ val f1 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR)
|
|
|
|
+ val f2 = RankExtractorFeature_20240530.divSmooth2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)
|
|
|
|
+ val f3 = RankExtractorFeature_20240530.divSmooth2(conver, click, CVR_SMOOTH_BETA_FACTOR)
|
|
val f4 = conver
|
|
val f4 = conver
|
|
- val f5 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE) * cpc * 1000
|
|
|
|
|
|
+ val f5 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR) * cpc * 1000
|
|
featureMap.put("d1_feature" + "_" + prefix + "_" + "ctr", f1)
|
|
featureMap.put("d1_feature" + "_" + prefix + "_" + "ctr", f1)
|
|
featureMap.put("d1_feature" + "_" + prefix + "_" + "ctcvr", f2)
|
|
featureMap.put("d1_feature" + "_" + prefix + "_" + "ctcvr", f2)
|
|
featureMap.put("d1_feature" + "_" + prefix + "_" + "cvr", f3)
|
|
featureMap.put("d1_feature" + "_" + prefix + "_" + "cvr", f3)
|