Browse Source

增加onlinefeature训练0116

sunmingze 1 year ago
parent
commit
dcd52be30b

+ 6 - 1
src/main/java/examples/sparksql/SparkAdCTRSampleTester.java

@@ -49,7 +49,12 @@ public class SparkAdCTRSampleTester {
     public static String singleParse2(Record record) {
         // 数据解析
         String label = record.getString("adclick_ornot");
-        String samples = record.getString("lrsample");
+        if (label == null || label.equals("1")) {
+            label = "0";
+        } else {
+            label = "1";
+        }
+        String samples = record.getString("lrsample").replaceAll("\\\\t","\t");
         return label + "\t" +  samples;
     }
 

+ 7 - 1
src/main/java/examples/sparksql/SparkAdCVRSampleTester.java

@@ -32,6 +32,7 @@ public class SparkAdCVRSampleTester {
         JavaRDD<Record> readData = odpsOps.readTableWithJava(project, table, partition, new RecordsToSamples(), Integer.valueOf(30));
         readData.filter(row -> row.get("type") != null)
                 .filter(row -> row.get("lrsample") != null)
+                .filter(row -> row.getString("adclick_ornot").equals("0"))
                 .map(line -> singleParse(line))
                 .saveAsTextFile(hdfsPath);
     }
@@ -47,7 +48,12 @@ public class SparkAdCVRSampleTester {
     public static String singleParse(Record record) {
         // 数据解析
         String label = record.getString("adinvert_ornot");
-        String samples = record.getString("lrsample");
+        if (label == null || label.equals("1")) {
+            label = "0";
+        } else {
+            label = "1";
+        }
+        String samples = record.getString("lrsample").replaceAll("\\\\t","\t");
         return label + "\t" + samples;
     }
 }