|
@@ -51,6 +51,7 @@ object makedata_ad_33_bucketData_20240622 {
|
|
val beginStr = param.getOrElse("beginStr", "20240620")
|
|
val beginStr = param.getOrElse("beginStr", "20240620")
|
|
val endStr = param.getOrElse("endStr", "20240620")
|
|
val endStr = param.getOrElse("endStr", "20240620")
|
|
val repartition = param.getOrElse("repartition", "200").toInt
|
|
val repartition = param.getOrElse("repartition", "200").toInt
|
|
|
|
+ val filterNames = param.getOrElse("filterNames", "").split(",").toSet
|
|
|
|
|
|
val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
|
|
val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
|
|
for (date <- dateRange) {
|
|
for (date <- dateRange) {
|
|
@@ -84,16 +85,22 @@ object makedata_ad_33_bucketData_20240622 {
|
|
case (label, features) =>
|
|
case (label, features) =>
|
|
val featuresBucket = features.map{
|
|
val featuresBucket = features.map{
|
|
case (name, score) =>
|
|
case (name, score) =>
|
|
- if (score > 1E-8) {
|
|
|
|
- if (bucketsMap.contains(name)){
|
|
|
|
- val (_, buckets) = bucketsMap(name)
|
|
|
|
- val scoreNew = 1.0 / (buckets.length + 1) * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
|
|
- name + ":" + scoreNew.toString
|
|
|
|
- }else{
|
|
|
|
- name + ":" + score.toString
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
|
|
+ var ifFilter = false
|
|
|
|
+ filterNames.foreach(r=> if (!ifFilter && name.startsWith(r)) {ifFilter = true} )
|
|
|
|
+ if (ifFilter){
|
|
""
|
|
""
|
|
|
|
+ }else{
|
|
|
|
+ if (score > 1E-8) {
|
|
|
|
+ if (bucketsMap.contains(name)) {
|
|
|
|
+ val (_, buckets) = bucketsMap(name)
|
|
|
|
+ val scoreNew = 1.0 / (buckets.length + 1) * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
|
|
+ name + ":" + scoreNew.toString
|
|
|
|
+ } else {
|
|
|
|
+ name + ":" + score.toString
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ ""
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}.filter(_.nonEmpty)
|
|
}.filter(_.nonEmpty)
|
|
result.add(label + "\t" + featuresBucket.mkString("\t"))
|
|
result.add(label + "\t" + featuresBucket.mkString("\t"))
|