|
@@ -16,6 +16,7 @@ import org.apache.spark.api.java.JavaSparkContext;
|
|
|
import org.apache.spark.api.java.function.Function2;
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
import java.util.Map;
|
|
|
|
|
|
|
|
@@ -82,7 +83,7 @@ public class SparkAdCTRSampleLoader {
|
|
|
bytesFeatureExtractor.getCrossFeature(adItemBytesFeature, adRequestContextBytesFeature, userBytesFeature);
|
|
|
|
|
|
ListMultimap<FeatureGroup, BaseFeature> featureMap = bytesFeatureExtractor.getFeatures();
|
|
|
- return parseSamplesToString(label, featureMap);
|
|
|
+ return parseSamplesToString2(label, featureMap);
|
|
|
}
|
|
|
|
|
|
// 构建样本的字符串
|
|
@@ -97,4 +98,24 @@ public class SparkAdCTRSampleLoader {
|
|
|
return label + "\t" + String.join("\t", featureList);
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+ // 构建样本的字符串
|
|
|
+ public static String parseSamplesToString2(String label, ListMultimap<FeatureGroup, BaseFeature> featureMap) {
|
|
|
+ ArrayList<String> featureList = new ArrayList<String>();
|
|
|
+ List<FeatureGroup> keys = new ArrayList<>(featureMap.keySet());
|
|
|
+
|
|
|
+ for (FeatureGroup group : keys) {
|
|
|
+ List<BaseFeature> feaList = featureMap.get(group);
|
|
|
+ for(BaseFeature baseFeature : feaList){
|
|
|
+ Long featureIdentifier = baseFeature.getIdentifier();
|
|
|
+ String feaName = baseFeature.getFea();
|
|
|
+ featureList.add(String.valueOf(feaName) + ":1");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return label + "\t" + String.join("\t", featureList);
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
}
|