|
@@ -1,4 +1,4 @@
|
|
|
-package com.aliyun.odps.spark.zhp
|
|
|
+package com.aliyun.odps.spark.zhp.makedata_ad
|
|
|
|
|
|
import com.alibaba.fastjson.JSON
|
|
|
import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils}
|
|
@@ -24,7 +24,7 @@ object makedata_ad_33_bucketData_20240622 {
|
|
|
|
|
|
val loader = getClass.getClassLoader
|
|
|
|
|
|
- val resourceUrlBucket = loader.getResource("20240622_ad_bucket_249.txt")
|
|
|
+ val resourceUrlBucket = loader.getResource("20240624_ad_bucket_249.txt")
|
|
|
val buckets =
|
|
|
if (resourceUrlBucket != null) {
|
|
|
val buckets = Source.fromURL(resourceUrlBucket).getLines().mkString("\n")
|
|
@@ -51,6 +51,7 @@ object makedata_ad_33_bucketData_20240622 {
|
|
|
val beginStr = param.getOrElse("beginStr", "20240620")
|
|
|
val endStr = param.getOrElse("endStr", "20240620")
|
|
|
val repartition = param.getOrElse("repartition", "200").toInt
|
|
|
+ val filterNames = param.getOrElse("filterNames", "").split(",").toSet
|
|
|
|
|
|
val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
|
|
|
for (date <- dateRange) {
|
|
@@ -84,16 +85,24 @@ object makedata_ad_33_bucketData_20240622 {
|
|
|
case (label, features) =>
|
|
|
val featuresBucket = features.map{
|
|
|
case (name, score) =>
|
|
|
- if (score > 1E-8) {
|
|
|
- if (bucketsMap.contains(name)){
|
|
|
- val (_, buckets) = bucketsMap(name)
|
|
|
- val scoreNew = 1.0 / (buckets.length + 1) * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
|
- name + ":" + scoreNew.toString
|
|
|
- }else{
|
|
|
- name + ":" + score.toString
|
|
|
- }
|
|
|
- } else {
|
|
|
+ var ifFilter = false
|
|
|
+ if (filterNames.nonEmpty){
|
|
|
+ filterNames.foreach(r=> if (!ifFilter && name.startsWith(r)) {ifFilter = true} )
|
|
|
+ }
|
|
|
+ if (ifFilter){
|
|
|
""
|
|
|
+ }else{
|
|
|
+ if (score > 1E-8) {
|
|
|
+ if (bucketsMap.contains(name)) {
|
|
|
+ val (_, buckets) = bucketsMap(name)
|
|
|
+ val scoreNew = 1.0 / (buckets.length + 1) * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
|
+ name + ":" + scoreNew.toString
|
|
|
+ } else {
|
|
|
+ name + ":" + score.toString
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ ""
|
|
|
+ }
|
|
|
}
|
|
|
}.filter(_.nonEmpty)
|
|
|
result.add(label + "\t" + featuresBucket.mkString("\t"))
|