|
@@ -38,7 +38,8 @@ public class SparkAdCTRSampleTester {
|
|
JavaRDD<Record> readData = odpsOps.readTableWithJava(project, table, partition, new RecordsToSamples(), Integer.valueOf(30));
|
|
JavaRDD<Record> readData = odpsOps.readTableWithJava(project, table, partition, new RecordsToSamples(), Integer.valueOf(30));
|
|
readData.filter(row -> row.get("type") != null)
|
|
readData.filter(row -> row.get("type") != null)
|
|
.filter(row -> row.get("lrsample") != null)
|
|
.filter(row -> row.get("lrsample") != null)
|
|
- .map(line -> singleParse(line))
|
|
|
|
|
|
+ .filter(row -> row.get("type").equals("VlogAdCtrLRScorer") )
|
|
|
|
+ .map(line -> singleParse2(line))
|
|
.saveAsTextFile(hdfsPath);
|
|
.saveAsTextFile(hdfsPath);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -54,7 +55,7 @@ public class SparkAdCTRSampleTester {
|
|
// 单条日志处理逻辑
|
|
// 单条日志处理逻辑
|
|
public static String singleParse(Record record) {
|
|
public static String singleParse(Record record) {
|
|
// 数据解析
|
|
// 数据解析
|
|
- String label = record.getString("pctr");
|
|
|
|
|
|
+ String label = record.getString("adclick_ornot");
|
|
|
|
|
|
// 从sql的 record中 初始化对象内容
|
|
// 从sql的 record中 初始化对象内容
|
|
AdRequestContext requestContext = AdSampleConstructor.constructRequestContext(record);
|
|
AdRequestContext requestContext = AdSampleConstructor.constructRequestContext(record);
|
|
@@ -75,6 +76,16 @@ public class SparkAdCTRSampleTester {
|
|
return parseSamplesToString2(label, lrSamples);
|
|
return parseSamplesToString2(label, lrSamples);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ // 单条日志处理逻辑
|
|
|
|
+ public static String singleParse2(Record record) {
|
|
|
|
+ // 数据解析
|
|
|
|
+ String label = record.getString("adclick_ornot");
|
|
|
|
+ String samples = record.getString("lrsample");
|
|
|
|
+
|
|
|
|
+ return label + "\t" + samples;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
|
|
// 构建样本的字符串
|
|
// 构建样本的字符串
|
|
public static String parseSamplesToString2(String label, LRSamples lrSamples) {
|
|
public static String parseSamplesToString2(String label, LRSamples lrSamples) {
|