|
@@ -17,7 +17,7 @@ import scala.collection.mutable
|
|
|
注意:所有的构造特征,原始值为0.0时,当作无意义,不保留; 如果经过change变换,得到0.0,保留。
|
|
|
*/
|
|
|
|
|
|
-object makedata_06_originData_v2 {
|
|
|
+object makedata_06_originData_v3 {
|
|
|
def main(args: Array[String]) {
|
|
|
val spark = SparkSession
|
|
|
.builder()
|
|
@@ -31,9 +31,9 @@ object makedata_06_originData_v2 {
|
|
|
val partitionPrefix = param.getOrElse("partitionPrefix", "dt=")
|
|
|
val beginStr = param.getOrElse("beginStr", "20230101")
|
|
|
val endStr = param.getOrElse("endStr", "20230101")
|
|
|
- val savePath = param.getOrElse("savePath", "/dw/recommend/model/00_sample_data/")
|
|
|
+ val savePath = param.getOrElse("savePath", "/dw/recommend/model/00_sample_data_v3/")
|
|
|
val project = param.getOrElse("project", "loghubods")
|
|
|
- val table = param.getOrElse("table", "alg_recsys_view_sample_v2")
|
|
|
+ val table = param.getOrElse("table", "alg_recsys_view_sample_v3")
|
|
|
|
|
|
|
|
|
|
|
@@ -65,7 +65,8 @@ object makedata_06_originData_v2 {
|
|
|
"i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
|
|
|
"i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
|
|
|
"i_7day_exp_cnt", "i_7day_click_cnt", "i_7day_share_cnt", "i_7day_return_cnt",
|
|
|
- "i_3month_exp_cnt", "i_3month_click_cnt", "i_3month_share_cnt", "i_3month_return_cnt"
|
|
|
+ "i_3month_exp_cnt", "i_3month_click_cnt", "i_3month_share_cnt", "i_3month_return_cnt",
|
|
|
+ "video_recommend"
|
|
|
)
|
|
|
val originFeatureMap = getFeatureFromSet(originFeatureName, record)
|
|
|
|
|
@@ -97,7 +98,7 @@ object makedata_06_originData_v2 {
|
|
|
"apptype", "logtimestamp", "clientip", "ctx_day", "ctx_week", "ctx_hour", "ctx_region", "ctx_city",
|
|
|
"gender", "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_sdkversion",
|
|
|
"machineinfo_system", "machineinfo_wechatversion", "gmt_create_user",
|
|
|
- "title", "tags"
|
|
|
+ "title", "tags", "video_recommend"
|
|
|
), record)
|
|
|
val f2 = RankExtractorUserFeature.getUserRateFeature(originFeatureMap)
|
|
|
val f3 = RankExtractorUserFeature.cntFeatureChange(originFeatureMap,
|
|
@@ -157,7 +158,7 @@ object makedata_06_originData_v2 {
|
|
|
"u_7day_exp_cnt", "u_7day_click_cnt", "u_7day_share_cnt", "u_7day_return_cnt",
|
|
|
"u_3month_exp_cnt", "u_3month_click_cnt", "u_3month_share_cnt", "u_3month_return_cnt",
|
|
|
|
|
|
- "title", "tags", "total_time", "play_count_total",
|
|
|
+ "title", "tags", "total_time", "play_count_total", "video_recommend",
|
|
|
"i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
|
|
|
"i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
|
|
|
"i_7day_exp_cnt", "i_7day_click_cnt", "i_7day_share_cnt", "i_7day_return_cnt",
|
|
@@ -199,9 +200,11 @@ object makedata_06_originData_v2 {
|
|
|
})
|
|
|
|
|
|
val labels = Set(
|
|
|
- "is_share", "is_return", "playtime",
|
|
|
- "is_play",
|
|
|
- "share_ts", "share_ts_list", "return_mid_ts_list"
|
|
|
+ "pagesource", "recommend_page_type", "pagesource_change",
|
|
|
+ "abcode",
|
|
|
+ "is_play", "playtime",
|
|
|
+ "is_share", "share_cnt_pv", "share_ts_list",
|
|
|
+ "is_return", "return_cnt_pv", "return_cnt_uv", "return_mid_ts_list"
|
|
|
)
|
|
|
val labelNew = new JSONObject
|
|
|
val labelMap = getFeatureFromSet(labels, record)
|
|
@@ -215,8 +218,9 @@ object makedata_06_originData_v2 {
|
|
|
val videoid = record.getString("videoid")
|
|
|
val logtimestamp = record.getString("logtimestamp")
|
|
|
val sessionid = record.getString("sessionid")
|
|
|
+ val apptype = record.getString("apptype")
|
|
|
|
|
|
- val logKey = (mid, videoid, logtimestamp, sessionid).productIterator.mkString(":")
|
|
|
+ val logKey = (mid, videoid, logtimestamp, sessionid, apptype).productIterator.mkString(":")
|
|
|
val labelKey = labelNew.toString()
|
|
|
val featureKey = resultNew.toString()
|
|
|
|