瀏覽代碼

过滤page

jch 4 天之前
父節點
當前提交
759bab8b59

+ 6 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys_r_rate/makedata_recsys_83_originData_20250317.scala

@@ -1,6 +1,7 @@
 package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
 
 import com.aliyun.odps.data.Record
+import com.aliyun.odps.spark.examples.myUtils.DataUtils.getStringValue
 import com.aliyun.odps.spark.examples.myUtils._
 import examples.utils.SimilarityUtils
 import org.apache.spark.rdd.RDD
@@ -151,7 +152,12 @@ object makedata_recsys_83_originData_20250317 {
         })
 
       // 2.4 加载样本数据
+      val pageSet = Set("详情后沉浸页", "回流后沉浸页&内页feed", "首页feed")
       val odpsData = DataUtils.getODPSData(sc, project, table, partition, tablePart)
+        .filter(record => {
+          val page = getStringValue(record, "page")
+          pageSet.contains(page)
+        })
 
       // 2.5 样本重采样
       val filterColumns = Set("allfeaturemap", "metafeaturemap")