|
@@ -21,8 +21,18 @@ object makedata_ad_32_bucket_20240718 {
|
|
.getOrCreate()
|
|
.getOrCreate()
|
|
val sc = spark.sparkContext
|
|
val sc = spark.sparkContext
|
|
|
|
|
|
|
|
+ // 1 读取参数
|
|
|
|
+ val param = ParamUtils.parseArgs(args)
|
|
|
|
+ val readPath = param.getOrElse("readPath", "/dw/recommend/model/31_ad_sample_data/20240620*")
|
|
|
|
+ val savePath = param.getOrElse("savePath", "/dw/recommend/model/32_bucket_data/")
|
|
|
|
+ val fileName = param.getOrElse("fileName", "20240620_100")
|
|
|
|
+ val sampleRate = param.getOrElse("sampleRate", "1.0").toDouble
|
|
|
|
+ val bucketNum = param.getOrElse("bucketNum", "100").toInt
|
|
|
|
+ val featureNameFile = param.getOrElse("featureNameFile", "20240718_ad_feature_name.txt");
|
|
|
|
+
|
|
|
|
+
|
|
val loader = getClass.getClassLoader
|
|
val loader = getClass.getClassLoader
|
|
- val resourceUrl = loader.getResource("20240703_ad_feature_name.txt")
|
|
|
|
|
|
+ val resourceUrl = loader.getResource(featureNameFile)
|
|
val content =
|
|
val content =
|
|
if (resourceUrl != null) {
|
|
if (resourceUrl != null) {
|
|
val content = Source.fromURL(resourceUrl).getLines().mkString("\n")
|
|
val content = Source.fromURL(resourceUrl).getLines().mkString("\n")
|
|
@@ -38,14 +48,6 @@ object makedata_ad_32_bucket_20240718 {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
- // 1 读取参数
|
|
|
|
- val param = ParamUtils.parseArgs(args)
|
|
|
|
- val readPath = param.getOrElse("readPath", "/dw/recommend/model/31_ad_sample_data/20240620*")
|
|
|
|
- val savePath = param.getOrElse("savePath", "/dw/recommend/model/32_bucket_data/")
|
|
|
|
- val fileName = param.getOrElse("fileName", "20240620_100")
|
|
|
|
- val sampleRate = param.getOrElse("sampleRate", "1.0").toDouble
|
|
|
|
- val bucketNum = param.getOrElse("bucketNum", "100").toInt
|
|
|
|
-
|
|
|
|
val data = sc.textFile(readPath)
|
|
val data = sc.textFile(readPath)
|
|
println("问题数据数量:" + data.filter(r=>r.split("\t").length != 3).count())
|
|
println("问题数据数量:" + data.filter(r=>r.split("\t").length != 3).count())
|
|
val data1 = data.map(r => {
|
|
val data1 = data.map(r => {
|