|
@@ -1,6 +1,7 @@
|
|
|
package com.aliyun.odps.spark.examples.makedata_recsys_r_rate
|
|
|
|
|
|
import com.aliyun.odps.data.Record
|
|
|
+import com.aliyun.odps.spark.examples.myUtils.DataUtils.getStringValue
|
|
|
import com.aliyun.odps.spark.examples.myUtils._
|
|
|
import examples.utils.SimilarityUtils
|
|
|
import org.apache.spark.rdd.RDD
|
|
@@ -151,7 +152,12 @@ object makedata_recsys_83_originData_20250317 {
|
|
|
})
|
|
|
|
|
|
// 2.4 加载样本数据
|
|
|
+ val pageSet = Set("详情后沉浸页", "回流后沉浸页&内页feed", "首页feed")
|
|
|
val odpsData = DataUtils.getODPSData(sc, project, table, partition, tablePart)
|
|
|
+ .filter(record => {
|
|
|
+ val page = getStringValue(record, "page")
|
|
|
+ pageSet.contains(page)
|
|
|
+ })
|
|
|
|
|
|
// 2.5 样本重采样
|
|
|
val filterColumns = Set("allfeaturemap", "metafeaturemap")
|