|
@@ -53,38 +53,33 @@ object makedata_06_strData {
|
|
|
numPartition = tablePart)
|
|
|
.map(record => {
|
|
|
|
|
|
- val originSecene = Set(
|
|
|
- "apptype", "logtimestamp", "clientip", "ctx_day", "ctx_week", "ctx_hour", "ctx_region", "ctx_city"
|
|
|
- )
|
|
|
- val originUser = Set(
|
|
|
+ val originFeatureName = Set(
|
|
|
+ "apptype", "logtimestamp", "clientip", "ctx_day", "ctx_week", "ctx_hour", "ctx_region", "ctx_city",
|
|
|
+
|
|
|
"gender", "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_sdkversion",
|
|
|
"machineinfo_system", "machineinfo_wechatversion", "gmt_create_user",
|
|
|
"u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
|
|
|
"u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt",
|
|
|
"u_7day_exp_cnt", "u_7day_click_cnt", "u_7day_share_cnt", "u_7day_return_cnt",
|
|
|
- "u_3month_exp_cnt", "u_3month_click_cnt", "u_3month_share_cnt", "u_3month_return_cnt"
|
|
|
- )
|
|
|
- val originItem = Set(
|
|
|
+ "u_3month_exp_cnt", "u_3month_click_cnt", "u_3month_share_cnt", "u_3month_return_cnt",
|
|
|
+
|
|
|
"title", "tags", "total_time", "play_count_total",
|
|
|
"i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
|
|
|
"i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
|
|
|
"i_7day_exp_cnt", "i_7day_click_cnt", "i_7day_share_cnt", "i_7day_return_cnt",
|
|
|
"i_3month_exp_cnt", "i_3month_click_cnt", "i_3month_share_cnt", "i_3month_return_cnt"
|
|
|
)
|
|
|
- val originItemRealtime = Set(
|
|
|
- "view_pv_list_1day","view_uv_list_1day","play_pv_list_1day","play_uv_list_1day",
|
|
|
- "share_pv_list_1day", "share_uv_list_1day","return_uv_list_1day",
|
|
|
- "p_view_uv_list_1day","p_view_pv_list_1day","p_return_uv_list_1day",
|
|
|
- "share_uv_list_2day","share_pv_list_2day","share_uv_list_3day","share_pv_list_3day",
|
|
|
+ val originFeatureMap = getFeatureFromSet(originFeatureName, record)
|
|
|
|
|
|
- "view_uv_list_1h","view_pv_list_1h","play_uv_list_1h","play_pv_list_1h",
|
|
|
- "share_uv_list_1h","share_pv_list_1h","return_uv_list_1h","p_return_uv_list_1h"
|
|
|
- )
|
|
|
+ val itemRealtimeFeatureMap = getFeatureFromSet(Set(
|
|
|
+ "view_pv_list_1day", "view_uv_list_1day", "play_pv_list_1day", "play_uv_list_1day",
|
|
|
+ "share_pv_list_1day", "share_uv_list_1day", "return_uv_list_1day",
|
|
|
+ "p_view_uv_list_1day", "p_view_pv_list_1day", "p_return_uv_list_1day",
|
|
|
+ "share_uv_list_2day", "share_pv_list_2day", "share_uv_list_3day", "share_pv_list_3day",
|
|
|
|
|
|
- val sceneFeatureMap = getFeatureFromSet(originSecene, record)
|
|
|
- val userFeatureMap = getFeatureFromSet(originUser, record)
|
|
|
- val itemFeatureMap = getFeatureFromSet(originItem, record)
|
|
|
- val itemRealtimeFeatureMap = getFeatureFromSet(originItemRealtime, record).map(r => {
|
|
|
+ "view_uv_list_1h", "view_pv_list_1h", "play_uv_list_1h", "play_pv_list_1h",
|
|
|
+ "share_uv_list_1h", "share_pv_list_1h", "return_uv_list_1h", "p_return_uv_list_1h"
|
|
|
+ ), record).map(r => {
|
|
|
val m = new java.util.HashMap[String, Double]()
|
|
|
r._2.split(",").foreach(r => {
|
|
|
m.put(r.split(":")(0), r.split(":")(1).toDouble)
|
|
@@ -106,16 +101,16 @@ object makedata_06_strData {
|
|
|
"machineinfo_system", "machineinfo_wechatversion", "gmt_create_user",
|
|
|
"title", "tags"
|
|
|
), record)
|
|
|
- val f2 = RankExtractorUserFeature.getUserRateFeature(userFeatureMap)
|
|
|
- val f3 = RankExtractorUserFeature.cntFeatureChange(userFeatureMap,
|
|
|
+ val f2 = RankExtractorUserFeature.getUserRateFeature(originFeatureMap)
|
|
|
+ val f3 = RankExtractorUserFeature.cntFeatureChange(originFeatureMap,
|
|
|
new util.HashSet[String](util.Arrays.asList(
|
|
|
"u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
|
|
|
"u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt",
|
|
|
"u_7day_exp_cnt", "u_7day_click_cnt", "u_7day_share_cnt", "u_7day_return_cnt",
|
|
|
"u_3month_exp_cnt", "u_3month_click_cnt", "u_3month_share_cnt", "u_3month_return_cnt"))
|
|
|
)
|
|
|
- val f4 = RankExtractorItemFeature.getItemRateFeature(itemFeatureMap)
|
|
|
- val f5 = RankExtractorItemFeature.cntFeatureChange(itemFeatureMap,
|
|
|
+ val f4 = RankExtractorItemFeature.getItemRateFeature(originFeatureMap)
|
|
|
+ val f5 = RankExtractorItemFeature.cntFeatureChange(originFeatureMap,
|
|
|
new util.HashSet[String](util.Arrays.asList(
|
|
|
"total_time", "play_count_total",
|
|
|
"i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
|
|
@@ -123,7 +118,8 @@ object makedata_06_strData {
|
|
|
"i_7day_exp_cnt", "i_7day_click_cnt", "i_7day_share_cnt", "i_7day_return_cnt",
|
|
|
"i_3month_exp_cnt", "i_3month_click_cnt", "i_3month_share_cnt", "i_3month_return_cnt")))
|
|
|
val f6 = RankExtractorItemFeature.getItemRealtimeTrend(javaMap,
|
|
|
- sceneFeatureMap.getOrElse("ctx_day", ""), sceneFeatureMap.getOrElse("ctx_hour", ""))
|
|
|
+ originFeatureMap.getOrElse("ctx_day", ""),
|
|
|
+ originFeatureMap.getOrElse("ctx_hour", ""))
|
|
|
val f7 = RankExtractorItemFeature.getItemRealtimeCnt(javaMap,
|
|
|
new util.HashSet[String](util.Arrays.asList(
|
|
|
"view_pv_list_1day", "view_uv_list_1day", "play_pv_list_1day", "play_uv_list_1day",
|
|
@@ -134,12 +130,12 @@ object makedata_06_strData {
|
|
|
"view_uv_list_1h", "view_pv_list_1h", "play_uv_list_1h", "play_pv_list_1h",
|
|
|
"share_uv_list_1h", "share_pv_list_1h", "return_uv_list_1h", "p_return_uv_list_1h"
|
|
|
)),
|
|
|
- sceneFeatureMap.getOrElse("ctx_day", ""),
|
|
|
- sceneFeatureMap.getOrElse("ctx_hour", "")
|
|
|
+ originFeatureMap.getOrElse("ctx_day", ""),
|
|
|
+ originFeatureMap.getOrElse("ctx_hour", "")
|
|
|
)
|
|
|
val f8 = RankExtractorItemFeature.getItemRealtimeRate(javaMap,
|
|
|
- sceneFeatureMap.getOrElse("ctx_day", ""),
|
|
|
- sceneFeatureMap.getOrElse("ctx_hour", "")
|
|
|
+ originFeatureMap.getOrElse("ctx_day", ""),
|
|
|
+ originFeatureMap.getOrElse("ctx_hour", "")
|
|
|
)
|
|
|
|
|
|
// 1:特征聚合到map中
|
|
@@ -199,7 +195,7 @@ object makedata_06_strData {
|
|
|
val labelMap = getFeatureFromSet(labels, record)
|
|
|
labels.foreach(r => {
|
|
|
if (labelMap.containsKey(r)) {
|
|
|
- labelMap.put(r, labelMap.get(r).get)
|
|
|
+ labelNew.put(r, labelMap.get(r).get)
|
|
|
}
|
|
|
})
|
|
|
//3:记录唯一key
|
|
@@ -208,7 +204,7 @@ object makedata_06_strData {
|
|
|
val logtimestamp = record.getString("logtimestamp")
|
|
|
val sessionid = record.getString("sessionid")
|
|
|
|
|
|
- val logKey = (mid, videoid, logtimestamp, sessionid).productIterator.mkString("-")
|
|
|
+ val logKey = (mid, videoid, logtimestamp, sessionid).productIterator.mkString(":")
|
|
|
val labelKey = labelNew.toString()
|
|
|
val featureKey = resultNew.toString()
|
|
|
|