|
@@ -32,6 +32,7 @@ object makedata_i2i_06_itemPred_20241206 {
|
|
|
val savePath = param.getOrElse("savePath", "/dw/recommend/model/56_dssm_i2i_itempredData/")
|
|
|
val project = param.getOrElse("project", "loghubods")
|
|
|
val repartition = param.getOrElse("repartition", "100").toInt
|
|
|
+ val ifFilterCate = param.getOrElse("ifFilterCate", "true").toBoolean
|
|
|
|
|
|
// 2 读取onehot文件
|
|
|
val onehotMap_br = sc.broadcast(
|
|
@@ -126,7 +127,16 @@ object makedata_i2i_06_itemPred_20241206 {
|
|
|
result.add((vid, (feature, feature_action, feature_cate1, feature_cate2)))
|
|
|
}
|
|
|
result.iterator
|
|
|
- }).mapPartitions(row =>{
|
|
|
+ }).filter{
|
|
|
+ case (vid, (feature, feature_action, feature_cate1, feature_cate2)) =>
|
|
|
+ if (ifFilterCate){
|
|
|
+ val cate1 = JSON.parseObject(feature).getOrDefault("category1", "无").toString
|
|
|
+ val cate2 = JSON.parseObject(feature).getOrDefault("category2_1", "无").toString
|
|
|
+ !cate1.equals("无") || !cate2.equals("无")
|
|
|
+ }else{
|
|
|
+ true
|
|
|
+ }
|
|
|
+ }.mapPartitions(row =>{
|
|
|
val result = new ArrayBuffer[String]()
|
|
|
val onehotMap = onehotMap_br.value
|
|
|
val bucketsMap = bucketsMap_br.value
|