jch 5 月之前
父節點
當前提交
0b30aca406

+ 6 - 0
src/main/java/examples/extractor/RankExtractorFeature_20240530.java

@@ -8,6 +8,12 @@ public class RankExtractorFeature_20240530 {
         }
         return a / b;
     }
+    public static Double smoothDiv(double a, double b, double plus){
+        if (a == 0 || b == 0){
+            return 0D;
+        }
+        return a / (b + plus);
+    }
     public static Double calLog(double a){
         if (a <= 0){
             return 0D;

+ 24 - 12
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_61_originData_20241209.scala

@@ -38,6 +38,7 @@ object makedata_recsys_61_originData_20241209 {
     val repartition = param.getOrElse("repartition", "32").toInt
     val whatLabel = param.getOrElse("whatLabel", "is_share")
     val fuSampleRate = param.getOrElse("fuSampleRate", "0.1").toDouble
+    val smoothPlus = param.getOrElse("smoothPlus", "5").toDouble
 
     // 2 odps
     val odpsOps = env.getODPS(sc)
@@ -92,12 +93,14 @@ object makedata_recsys_61_originData_20241209 {
                 val exp = if (b_1.isEmpty) 0D else b_1.getIntValue("exp_pv_" + prefix2).toDouble
                 val share = if (b_2.isEmpty) 0D else b_2.getIntValue("share_pv_" + prefix2).toDouble
                 val returns = if (b_3.isEmpty) 0D else b_3.getIntValue("return_uv_" + prefix2).toDouble
-                val f1 = RankExtractorFeature_20240530.calDiv(share, exp)
+                val f0 = RankExtractorFeature_20240530.calLog(exp)
+                val f1 = RankExtractorFeature_20240530.smoothDiv(share, exp, smoothPlus)
                 val f2 = RankExtractorFeature_20240530.calLog(share)
-                val f3 = RankExtractorFeature_20240530.calDiv(returns, exp)
+                val f3 = RankExtractorFeature_20240530.smoothDiv(returns, exp, smoothPlus)
                 val f4 = RankExtractorFeature_20240530.calLog(returns)
                 val f5 = f3 * f4
-                val f6 = RankExtractorFeature_20240530.calDiv(returns, share)
+                val f6 = RankExtractorFeature_20240530.smoothDiv(returns, share, smoothPlus)
+                featureMap.put(prefix1 + "_" + prefix2 + "_" + "EXP", f0)
                 featureMap.put(prefix1 + "_" + prefix2 + "_" + "STR", f1)
                 featureMap.put(prefix1 + "_" + prefix2 + "_" + "log(share)", f2)
                 featureMap.put(prefix1 + "_" + prefix2 + "_" + "ROV", f3)
@@ -195,25 +198,34 @@ object makedata_recsys_61_originData_20241209 {
             )
             for ((featType, featPeriod, featData) <- vidStatFeat) {
               for (period <- featPeriod) {
-                // val view = if (featData.isEmpty) 0D else featData.getDoubleValue("view_" + period)
+                val view = if (featData.isEmpty) 0D else featData.getDoubleValue("view_" + period)
                 val share = if (featData.isEmpty) 0D else featData.getDoubleValue("share_" + period)
                 val return_ = if (featData.isEmpty) 0D else featData.getDoubleValue("return_" + period)
                 val view_hasreturn = if (featData.isEmpty) 0D else featData.getDoubleValue("view_hasreturn_" + period)
                 val share_hasreturn = if (featData.isEmpty) 0D else featData.getDoubleValue("share_hasreturn_" + period)
-                val ros = if (featData.isEmpty) 0D else featData.getDoubleValue("ros_" + period)
-                val rov = if (featData.isEmpty) 0D else featData.getDoubleValue("rov_" + period)
-                val r_cnt = if (featData.isEmpty) 0D else featData.getDoubleValue("r_cnt_" + period)
-                val r_rate = if (featData.isEmpty) 0D else featData.getDoubleValue("r_rate_" + period)
-                val r_cnt4s = if (featData.isEmpty) 0D else featData.getDoubleValue("r_cnt4s_" + period)
-                val str = if (featData.isEmpty) 0D else featData.getDoubleValue("str_" + period)
+                var ros = if (featData.isEmpty) 0D else featData.getDoubleValue("ros_" + period)
+                var rov = if (featData.isEmpty) 0D else featData.getDoubleValue("rov_" + period)
+                var r_cnt = if (featData.isEmpty) 0D else featData.getDoubleValue("r_cnt_" + period)
+                var r_rate = if (featData.isEmpty) 0D else featData.getDoubleValue("r_rate_" + period)
+                var r_cnt4s = if (featData.isEmpty) 0D else featData.getDoubleValue("r_cnt4s_" + period)
+                var str = if (featData.isEmpty) 0D else featData.getDoubleValue("str_" + period)
+                if (smoothPlus > 0) {
+                  ros = RankExtractorFeature_20240530.smoothDiv(return_, share, smoothPlus)
+                  rov = RankExtractorFeature_20240530.smoothDiv(return_, view, smoothPlus)
+                  r_cnt = RankExtractorFeature_20240530.smoothDiv(return_, view_hasreturn, smoothPlus)
+                  r_rate = RankExtractorFeature_20240530.smoothDiv(view_hasreturn, view, smoothPlus)
+                  r_cnt4s = RankExtractorFeature_20240530.smoothDiv(return_, share_hasreturn, smoothPlus)
+                  str = RankExtractorFeature_20240530.smoothDiv(share, view, smoothPlus)
+                }
+
                 // scale
-                // val view_s = RankExtractorFeature_20240530.calLog(view)
+                val view_s = RankExtractorFeature_20240530.calLog(view)
                 val share_s = RankExtractorFeature_20240530.calLog(share)
                 val return_s = RankExtractorFeature_20240530.calLog(return_)
                 val view_hasreturn_s = RankExtractorFeature_20240530.calLog(view_hasreturn)
                 val share_hasreturn_s = RankExtractorFeature_20240530.calLog(share_hasreturn)
 
-                // featureMap.put(featType + "_" + period + "_" + "view", view_s)
+                featureMap.put(featType + "_" + period + "_" + "view", view_s)
                 featureMap.put(featType + "_" + period + "_" + "share", share_s)
                 featureMap.put(featType + "_" + period + "_" + "return", return_s)
                 featureMap.put(featType + "_" + period + "_" + "view_hasreturn", view_hasreturn_s)