소스 검색

feat:过滤没有回传广告主的样本

zhaohaipeng 7 달 전
부모
커밋
39350d5367
1개의 변경된 파일12개의 추가작업 그리고 7개의 파일을 삭제
  1. 12 7
      src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_31_originData_20240718.scala

+ 12 - 7
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_31_originData_20240718.scala

@@ -12,6 +12,7 @@ import org.xm.Similarity
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
+
 /*
    20240608 提取特征
  */
@@ -44,25 +45,29 @@ object makedata_ad_31_originData_20240718 {
       val dt = dt_hh.substring(0, 8)
       val hh = dt_hh.substring(8, 10)
       val partition = s"dt=$dt,hh=$hh"
-      if (filterHours.nonEmpty && filterHours.contains(hh)){
+      if (filterHours.nonEmpty && filterHours.contains(hh)) {
         println("不执行partiton:" + partition)
-      }else{
+      } else {
         println("开始执行partiton:" + partition)
         val odpsData = odpsOps.readTable(project = project,
             table = table,
             partition = partition,
             transfer = func,
             numPartition = tablePart)
+          .filter(record => {
+            val extendAlg: JSONObject = if (record.isNull("extend_alg")) new JSONObject() else
+              JSON.parseObject(record.getString("extend_alg"))
+            val isApi = extendAlg.getString("is_api")
+            "1".equals(isApi)
+          })
           .map(record => {
 
-
             val ts = record.getString("ts").toInt
             val cid = record.getString("cid")
             val apptype = record.getString("apptype")
             val extend: JSONObject = if (record.isNull("extend")) new JSONObject() else
               JSON.parseObject(record.getString("extend"))
 
-
             val featureMap = new JSONObject()
 
             val b1: JSONObject = if (record.isNull("b1_feature")) new JSONObject() else
@@ -112,7 +117,7 @@ object makedata_ad_31_originData_20240718 {
             if (b1.containsKey("cpa")) {
               featureMap.put("cpa", b1.getString("cpa").toDouble)
             }
-            if (b1.containsKey("weight") && b1.getString("weight").nonEmpty){
+            if (b1.containsKey("weight") && b1.getString("weight").nonEmpty) {
               featureMap.put("weight", b1.getString("weight").toDouble)
             }
 
@@ -326,8 +331,8 @@ object makedata_ad_31_originData_20240718 {
               }
             }
 
-            if (d3.nonEmpty){
-              val vTitle= d3.getString("title")
+            if (d3.nonEmpty) {
+              val vTitle = d3.getString("title")
               val score = Similarity.conceptSimilarity(title, vTitle)
               featureMap.put("ctitle_vtitle_similarity", score);
             }