zhaohaipeng пре 1 месец
родитељ
комит
cfc37abf07

+ 1 - 1
src/main/java/examples/utils/StatisticsUtil.java

@@ -74,7 +74,7 @@ public class StatisticsUtil {
         }
     }
 
-    private static boolean isRecommendScene(String page, String recommendPageType) {
+    public static boolean isRecommendScene(String page, String recommendPageType) {
         if (StringUtils.equals("详情后沉浸页", page)) {
             return true;
         } else if (StringUtils.equals("回流后沉浸页&内页feed", page) && StringUtils.isNotBlank(recommendPageType) && recommendPageType.endsWith("-pages/user-videos-share-recommend-detail")) {

+ 11 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys/v20250218/makedata_recsys_41_data_fu_sample_20250218.scala

@@ -3,7 +3,8 @@ package com.aliyun.odps.spark.examples.makedata_recsys.v20250218
 import com.alibaba.fastjson.JSON
 import com.aliyun.odps.TableSchema
 import com.aliyun.odps.data.Record
-import com.aliyun.odps.spark.examples.myUtils.{MyHdfsUtils, ParamUtils, env}
+import com.aliyun.odps.spark.examples.myUtils.{MyHdfsUtils, ParamUtils}
+import examples.utils.StatisticsUtil
 import org.apache.hadoop.io.compress.GzipCodec
 import org.apache.spark.sql.SparkSession
 import org.xm.Similarity
@@ -31,8 +32,17 @@ object makedata_recsys_41_data_fu_sample_20250218 {
     val savePath = param.getOrElse("savePath", "/dw/recommend/model/41_recsys_sample_data/20250221")
     val fuSampleRate = param.getOrElse("fuSampleRate", "0.05").toDouble
     val whatLabel = param.getOrElse("whatLabel", "is_share")
+    val whatApps = param.getOrElse("whatApps", "0,4,2,32,17,18,21,22,24,25,26,27,28,29,3,30,31,33,34,35,36").split(",").filter(r => r.nonEmpty).toList
 
     val data = sc.textFile(readPath)
+      .filter(line => {
+        val rLine = line.split("\t")
+        val logJson = JSON.parseObject(rLine(0))
+        val page = logJson.getString("page")
+        val recommendPageType = logJson.getString("recommendpagetype")
+
+        whatApps.contains(logJson.getString("apptype")) && StatisticsUtil.isRecommendScene(page, recommendPageType)
+      })
       .filter {
         line => {
           val rLine = line.split("\t")

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys/v20250218/makedata_recsys_41_originData_20250218.scala

@@ -141,7 +141,7 @@ object makedata_recsys_41_originData_20250218 {
 
             //5 处理log key表头。
             val logs = new JSONObject()
-            for (key <- List("apptype", "abcode", "mid", "vid", "page", "recommendpagetype", "level", "ts", "headvideoid")) {
+            for (key <- List("apptype", "abcode", "mid", "vid", "page", "recommendpagetype", "level", "ts", "headvideoid", "flowpool", "level", "hotsencetype")) {
               if (!record.isNull(key)) {
                 logs.put(key, record.getString(key))
               }