sunmingze пре 1 година
родитељ
комит
8950aa2585
1 измењених фајлова са 13 додато и 2 уклоњено
  1. 13 2
      src/main/java/examples/sparksql/SparkAdCTRSampleTester.java

+ 13 - 2
src/main/java/examples/sparksql/SparkAdCTRSampleTester.java

@@ -38,7 +38,8 @@ public class SparkAdCTRSampleTester {
         JavaRDD<Record> readData = odpsOps.readTableWithJava(project, table, partition, new RecordsToSamples(), Integer.valueOf(30));
         readData.filter(row -> row.get("type") != null)
                 .filter(row -> row.get("lrsample") != null)
-                .map(line -> singleParse(line))
+                .filter(row -> row.get("type").equals("VlogAdCtrLRScorer") )
+                .map(line -> singleParse2(line))
                 .saveAsTextFile(hdfsPath);
     }
 
@@ -54,7 +55,7 @@ public class SparkAdCTRSampleTester {
     // 单条日志处理逻辑
     public static String singleParse(Record record) {
         // 数据解析
-        String label = record.getString("pctr");
+        String label = record.getString("adclick_ornot");
 
         // 从sql的 record中 初始化对象内容
         AdRequestContext requestContext = AdSampleConstructor.constructRequestContext(record);
@@ -75,6 +76,16 @@ public class SparkAdCTRSampleTester {
         return parseSamplesToString2(label, lrSamples);
     }
 
+    // 单条日志处理逻辑
+    public static String singleParse2(Record record) {
+        // 数据解析
+        String label = record.getString("adclick_ornot");
+        String samples = record.getString("lrsample");
+
+        return label + "\t" +  samples;
+    }
+
+
 
     // 构建样本的字符串
     public static String parseSamplesToString2(String label, LRSamples lrSamples) {