|
@@ -151,16 +151,10 @@ object makedata_ad_33_bucketDataFromOriginToHive_20260120 {
|
|
|
val adverId = record.getString("adverid")
|
|
val adverId = record.getString("adverid")
|
|
|
!filterAdverIds.contains(adverId)
|
|
!filterAdverIds.contains(adverId)
|
|
|
})
|
|
})
|
|
|
- .map(record => {
|
|
|
|
|
- val pqtid = record.getString("pqtid")
|
|
|
|
|
- (pqtid, record)
|
|
|
|
|
- })
|
|
|
|
|
- .reduceByKey((a, b) => a)
|
|
|
|
|
- .map(_._2)
|
|
|
|
|
- .filter(record => {
|
|
|
|
|
- val label = record.getString(whatLabel).toInt
|
|
|
|
|
- label > 0 || Random.nextDouble() < negSampleRate
|
|
|
|
|
- })
|
|
|
|
|
|
|
+ // .filter(record => {
|
|
|
|
|
+ // val label = record.getString(whatLabel).toInt
|
|
|
|
|
+ // label > 0 || Random.nextDouble() < negSampleRate
|
|
|
|
|
+ // })
|
|
|
.map(record => {
|
|
.map(record => {
|
|
|
val featureMap = new JSONObject()
|
|
val featureMap = new JSONObject()
|
|
|
val ts = record.getString("ts").toInt
|
|
val ts = record.getString("ts").toInt
|
|
@@ -660,6 +654,17 @@ object makedata_ad_33_bucketDataFromOriginToHive_20260120 {
|
|
|
val labelKey = labels.toString()
|
|
val labelKey = labels.toString()
|
|
|
(logKey, labelKey, featureMap)
|
|
(logKey, labelKey, featureMap)
|
|
|
})
|
|
})
|
|
|
|
|
+ .map { case (logKey, labelKey, jsons) =>
|
|
|
|
|
+ val pqtid = jsons.getString("pqtid")
|
|
|
|
|
+ (pqtid, (logKey, labelKey, jsons))
|
|
|
|
|
+ }
|
|
|
|
|
+ .reduceByKey((a, b) => a)
|
|
|
|
|
+ .map(_._2)
|
|
|
|
|
+ .filter { case (logKey, labelKey, jsons) =>
|
|
|
|
|
+ val labelObject = JSON.parseObject(labelKey)
|
|
|
|
|
+ val label = labelObject.getString("ad_is_conversion").toInt
|
|
|
|
|
+ label > 0 || Random.nextDouble() < negSampleRate
|
|
|
|
|
+ }
|
|
|
odpsData
|
|
odpsData
|
|
|
}).reduce(_ union _)
|
|
}).reduce(_ union _)
|
|
|
.map { case (logKey, labelKey, jsons) =>
|
|
.map { case (logKey, labelKey, jsons) =>
|