|
@@ -8,6 +8,7 @@ import examples.extractor.ExtractorUtils
|
|
|
import examples.extractor.v20250218.ExtractFeature20250218
|
|
|
import examples.utils.{FestiveUtil, SimilarityUtils, StatisticsUtil}
|
|
|
import org.apache.hadoop.io.compress.GzipCodec
|
|
|
+import org.apache.spark.rdd.RDD
|
|
|
import org.apache.spark.sql.SparkSession
|
|
|
|
|
|
import java.util
|
|
@@ -51,9 +52,10 @@ object makedata_recsys_41_str_train_data_sample_20250319 {
|
|
|
s"dt=$dt,hh=$hh"
|
|
|
}
|
|
|
|
|
|
- val odpsData = partitions.map { partition => {
|
|
|
+ var odpsData: RDD[String] = sc.emptyRDD[String] // 初始化空RDD
|
|
|
+ for (partition <- partitions) {
|
|
|
println(s"开始读取分区: $partition")
|
|
|
- odpsOps.readTable(
|
|
|
+ val partitionData = odpsOps.readTable(
|
|
|
project = project,
|
|
|
table = table,
|
|
|
partition = partition,
|
|
@@ -173,8 +175,8 @@ object makedata_recsys_41_str_train_data_sample_20250319 {
|
|
|
|
|
|
})
|
|
|
})
|
|
|
+ odpsData = odpsData.union(partitionData)
|
|
|
}
|
|
|
- }.reduce(_ union _)
|
|
|
println(s"odos count: " + odpsData.count())
|
|
|
// 4 保存数据到hdfs
|
|
|
val hdfsPath = savePath
|