浏览代码

Update makedata_ad_31_originData_20250110: change smooth method

StrayWarrior 2 月之前
父节点
当前提交
eaa6fafe6d

+ 17 - 13
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_31_originData_20250110.scala

@@ -20,6 +20,9 @@ import scala.collection.mutable.ArrayBuffer
 
 object makedata_ad_31_originData_20250110 {
   val WILSON_ZSCORE = 1.96
+  val CTR_SMOOTH_BETA_FACTOR = 25
+  val CVR_SMOOTH_BETA_FACTOR = 10
+  val CTCVR_SMOOTH_BETA_FACTOR = 100
 
   def main(args: Array[String]): Unit = {
     val spark = SparkSession
@@ -132,15 +135,16 @@ object makedata_ad_31_originData_20250110 {
                 val conver = if (bn.isEmpty) 0D else bn.getIntValue("ad_conversion_" + prefix2).toDouble
                 val income = if (bn.isEmpty) 0D else bn.getIntValue("ad_income_" + prefix2).toDouble
                 // NOTE(zhoutian):
-                // 这里cpc只是为了计算cpm的平滑的工具量,没有实际业务意义,因为cpm并非比率,本身不适合直接计算平滑
+                // 这里cpc只是为了计算cpm的平滑的工具量,没有实际业务意义,因为cpm并非比率,本身不适合直接计算Wilson平滑
                 // 不使用cpa的原因是未来可能出现广告采用cpc计费的情况或者无法获取转化量的情况,用点击更为稳定
                 // 其它几组特征亦采用相同逻辑
+                // 2025-02-17改为增加固定分母平滑,income实际已经可以直接参与cpm平滑计算
                 val cpc = if (click == 0) 0D else income / click
-                val f1 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE)
-                val f2 = RankExtractorFeature_20240530.divSmooth1(conver, view, WILSON_ZSCORE)
-                val f3 = RankExtractorFeature_20240530.divSmooth1(conver, click, WILSON_ZSCORE)
+                val f1 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR)
+                val f2 = RankExtractorFeature_20240530.divSmooth2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)
+                val f3 = RankExtractorFeature_20240530.divSmooth2(conver, click, CVR_SMOOTH_BETA_FACTOR)
                 val f4 = conver
-                val f5 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE) * cpc * 1000
+                val f5 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR) * cpc * 1000
                 featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctr", f1)
                 featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctcvr", f2)
                 featureMap.put(prefix1 + "_" + prefix2 + "_" + "cvr", f3)
@@ -164,11 +168,11 @@ object makedata_ad_31_originData_20250110 {
                 val conver = if (bn.isEmpty) 0D else bn.getIntValue("ad_conversion_" + prefix2).toDouble
                 val income = if (bn.isEmpty) 0D else bn.getIntValue("ad_income_" + prefix2).toDouble
                 val cpc = if (click == 0) 0D else income / click
-                val f1 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE)
-                val f2 = RankExtractorFeature_20240530.divSmooth1(conver, view, WILSON_ZSCORE)
-                val f3 = RankExtractorFeature_20240530.divSmooth1(conver, click, WILSON_ZSCORE)
+                val f1 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR)
+                val f2 = RankExtractorFeature_20240530.divSmooth2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)
+                val f3 = RankExtractorFeature_20240530.divSmooth2(conver, click, CVR_SMOOTH_BETA_FACTOR)
                 val f4 = conver
-                val f5 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE) * cpc * 1000
+                val f5 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR) * cpc * 1000
                 featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctr", f1)
                 featureMap.put(prefix1 + "_" + prefix2 + "_" + "ctcvr", f2)
                 featureMap.put(prefix1 + "_" + prefix2 + "_" + "cvr", f3)
@@ -303,11 +307,11 @@ object makedata_ad_31_originData_20250110 {
                 val conver = if (!d1.containsKey("ad_conversion_" + prefix)) 0D else d1.getIntValue("ad_conversion_" + prefix).toDouble
                 val income = if (!d1.containsKey("ad_income_" + prefix)) 0D else d1.getIntValue("ad_income_" + prefix).toDouble
                 val cpc = if (click == 0) 0D else income / click
-                val f1 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE)
-                val f2 = RankExtractorFeature_20240530.divSmooth1(conver, view, WILSON_ZSCORE)
-                val f3 = RankExtractorFeature_20240530.divSmooth1(conver, click, WILSON_ZSCORE)
+                val f1 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR)
+                val f2 = RankExtractorFeature_20240530.divSmooth2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)
+                val f3 = RankExtractorFeature_20240530.divSmooth2(conver, click, CVR_SMOOTH_BETA_FACTOR)
                 val f4 = conver
-                val f5 = RankExtractorFeature_20240530.divSmooth1(click, view, WILSON_ZSCORE) * cpc * 1000
+                val f5 = RankExtractorFeature_20240530.divSmooth2(click, view, CTR_SMOOTH_BETA_FACTOR) * cpc * 1000
                 featureMap.put("d1_feature" + "_" + prefix + "_" + "ctr", f1)
                 featureMap.put("d1_feature" + "_" + prefix + "_" + "ctcvr", f2)
                 featureMap.put("d1_feature" + "_" + prefix + "_" + "cvr", f3)