|
@@ -3,12 +3,12 @@ package com.aliyun.odps.spark.examples.makedata_recsys.v20250218
|
|
import com.alibaba.fastjson.JSON
|
|
import com.alibaba.fastjson.JSON
|
|
import com.aliyun.odps.spark.examples.myUtils.{FileUtils, MyDateUtils, MyHdfsUtils, ParamUtils}
|
|
import com.aliyun.odps.spark.examples.myUtils.{FileUtils, MyDateUtils, MyHdfsUtils, ParamUtils}
|
|
import examples.extractor.ExtractorUtils
|
|
import examples.extractor.ExtractorUtils
|
|
|
|
+import examples.utils.RosUtil
|
|
import org.apache.hadoop.io.compress.GzipCodec
|
|
import org.apache.hadoop.io.compress.GzipCodec
|
|
import org.apache.spark.sql.SparkSession
|
|
import org.apache.spark.sql.SparkSession
|
|
|
|
|
|
import scala.collection.JavaConversions._
|
|
import scala.collection.JavaConversions._
|
|
import scala.collection.mutable.ArrayBuffer
|
|
import scala.collection.mutable.ArrayBuffer
|
|
-import examples.utils.RosUtil
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
* ros 多分类特征分桶
|
|
* ros 多分类特征分桶
|
|
@@ -74,7 +74,7 @@ object makedata_recsys_43_ros_multi_data_bucket_20250304 {
|
|
.map {
|
|
.map {
|
|
case (logKey, labelKey, features) =>
|
|
case (logKey, labelKey, features) =>
|
|
val labelJson = JSON.parseObject(labelKey)
|
|
val labelJson = JSON.parseObject(labelKey)
|
|
- val label = RosUtil.multiClassLabel(labelJson, labelKey)
|
|
|
|
|
|
+ val label = RosUtil.multiClassLabel(labelJson, whatLabel)
|
|
(logKey, label, features)
|
|
(logKey, label, features)
|
|
}
|
|
}
|
|
.mapPartitions(row => {
|
|
.mapPartitions(row => {
|