|
@@ -9,6 +9,7 @@ import examples.utils.SimilarityUtils
|
|
|
import org.apache.hadoop.io.compress.GzipCodec
|
|
|
import org.apache.spark.sql.SparkSession
|
|
|
|
|
|
+import java.util.Calendar
|
|
|
import scala.collection.JavaConversions._
|
|
|
import scala.collection.mutable.ArrayBuffer
|
|
|
import scala.util.Random
|
|
@@ -270,6 +271,16 @@ object makedata_recsys_61_originData_20241209 {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ // time
|
|
|
+ val ts = record.getString("ts")
|
|
|
+ val calendar = tsToCalendar(ts)
|
|
|
+ if (null != calendar) {
|
|
|
+ val week = calendar.get(Calendar.DAY_OF_WEEK)
|
|
|
+ val hour = calendar.get(Calendar.HOUR_OF_DAY) + 1
|
|
|
+ featureMap.put("week", week)
|
|
|
+ featureMap.put("hour", hour)
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
视频特征: 5*6*5 = 240个
|
|
|
曝光使用pv 分享使用pv 回流使用uv --> 1h 2h 3h 4h 12h 1d 3d 7d
|
|
@@ -311,7 +322,6 @@ object makedata_recsys_61_originData_20241209 {
|
|
|
val pagesource = record.getString("pagesource")
|
|
|
val mid = record.getString("mid")
|
|
|
// vid 已经提取了
|
|
|
- val ts = record.getString("ts")
|
|
|
val abcode = record.getString("abcode")
|
|
|
val level = if (record.isNull("level")) "0" else record.getString("level")
|
|
|
val logKey = (apptype, pagesource, mid, vid, ts, abcode, level).productIterator.mkString(",")
|
|
@@ -420,4 +430,15 @@ object makedata_recsys_61_originData_20241209 {
|
|
|
}
|
|
|
new JSONObject()
|
|
|
}
|
|
|
+
|
|
|
+ def tsToCalendar(ts: String): Calendar = {
|
|
|
+ val ms = ts.toLong * 1000L
|
|
|
+ if (ms > 1107658247000L && ms < 2685495047000L) {
|
|
|
+ val calendar = Calendar.getInstance
|
|
|
+ calendar.setTimeInMillis(ms)
|
|
|
+ calendar
|
|
|
+ } else {
|
|
|
+ null
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|