|
@@ -32,6 +32,7 @@ public class SparkAdCVRSampleTester {
|
|
|
JavaRDD<Record> readData = odpsOps.readTableWithJava(project, table, partition, new RecordsToSamples(), Integer.valueOf(30));
|
|
|
readData.filter(row -> row.get("type") != null)
|
|
|
.filter(row -> row.get("lrsample") != null)
|
|
|
+ .filter(row -> row.getString("adclick_ornot").equals("0"))
|
|
|
.map(line -> singleParse(line))
|
|
|
.saveAsTextFile(hdfsPath);
|
|
|
}
|
|
@@ -47,7 +48,12 @@ public class SparkAdCVRSampleTester {
|
|
|
public static String singleParse(Record record) {
|
|
|
// 数据解析
|
|
|
String label = record.getString("adinvert_ornot");
|
|
|
- String samples = record.getString("lrsample");
|
|
|
+ if (label == null || label.equals("1")) {
|
|
|
+ label = "0";
|
|
|
+ } else {
|
|
|
+ label = "1";
|
|
|
+ }
|
|
|
+ String samples = record.getString("lrsample").replaceAll("\\\\t","\t");
|
|
|
return label + "\t" + samples;
|
|
|
}
|
|
|
}
|