|
@@ -74,7 +74,9 @@ object makedata_16_bucketData_20240609 {
|
|
val logKey = rList(0)
|
|
val logKey = rList(0)
|
|
val labelKey = rList(1)
|
|
val labelKey = rList(1)
|
|
val features = rList(2).split(",").map(_.toDouble)
|
|
val features = rList(2).split(",").map(_.toDouble)
|
|
- (logKey, labelKey, features)
|
|
|
|
|
|
+ val allFeature: JSONObject = if (rList(3).equals("\\\\N")) new JSONObject() else
|
|
|
|
+ JSON.parseObject(rList(3))
|
|
|
|
+ (logKey, labelKey, features, allFeatureKey)
|
|
})
|
|
})
|
|
.filter{
|
|
.filter{
|
|
case (logKey, labelKey, features) =>
|
|
case (logKey, labelKey, features) =>
|
|
@@ -97,12 +99,17 @@ object makedata_16_bucketData_20240609 {
|
|
val featuresBucket = contentList.indices.map(i =>{
|
|
val featuresBucket = contentList.indices.map(i =>{
|
|
val featureName = contentList(i)
|
|
val featureName = contentList(i)
|
|
val score = features(i)
|
|
val score = features(i)
|
|
- if (score > 1E-8){
|
|
|
|
- val (bucketNum, buckets) = bucketsMap(featureName)
|
|
|
|
- val scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
|
|
- featureName + ":" + scoreNew.toString
|
|
|
|
- }else{
|
|
|
|
- ""
|
|
|
|
|
|
+ // 用户
|
|
|
|
+ if (featureName.startsWith("c")) {
|
|
|
|
+ allFeature.getOrDefault(featureName, "0").toString
|
|
|
|
+ } else {
|
|
|
|
+ if (score > 1E-8) {
|
|
|
|
+ val (bucketNum, buckets) = bucketsMap(featureName)
|
|
|
|
+ val scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score).toDouble + 1.0)
|
|
|
|
+ featureName + ":" + scoreNew.toString
|
|
|
|
+ } else {
|
|
|
|
+ ""
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}).filter(_.nonEmpty)
|
|
}).filter(_.nonEmpty)
|
|
result.add(label + "\t" + featuresBucket.mkString("\t"))
|
|
result.add(label + "\t" + featuresBucket.mkString("\t"))
|