|
@@ -9,6 +9,7 @@ import examples.utils.SimilarityUtils
|
|
|
import org.apache.hadoop.io.compress.GzipCodec
|
|
|
import org.apache.spark.sql.SparkSession
|
|
|
|
|
|
+import java.util.Calendar
|
|
|
import scala.collection.JavaConversions._
|
|
|
import scala.collection.mutable.ArrayBuffer
|
|
|
import scala.util.Random
|
|
@@ -277,6 +278,16 @@ object makedata_recsys_72_originData_20250109 {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ // time
|
|
|
+ val ts = record.getString("ts")
|
|
|
+ val calendar = tsToCalendar(ts)
|
|
|
+ if (null != calendar) {
|
|
|
+ val week = calendar.get(Calendar.DAY_OF_WEEK)
|
|
|
+ val hour = calendar.get(Calendar.HOUR_OF_DAY) + 1
|
|
|
+ featureMap.put("week", week)
|
|
|
+ featureMap.put("hour", hour)
|
|
|
+ }
|
|
|
+
|
|
|
/*
|
|
|
视频特征: 5*6*5 = 240个
|
|
|
曝光使用pv 分享使用pv 回流使用uv --> 1h 2h 3h 4h 12h 1d 3d 7d
|
|
@@ -316,7 +327,6 @@ object makedata_recsys_72_originData_20250109 {
|
|
|
val abcode = record.getString("abcode")
|
|
|
val mid = record.getString("mid")
|
|
|
val level = getStringValue(record, "level", "0")
|
|
|
- val ts = record.getString("ts")
|
|
|
val logKey = (apptype, page, pagesource, recommendpagetype, flowpool, abcode, mid, vid, level, ts).productIterator.mkString(",")
|
|
|
val labelKey = labels.toString()
|
|
|
val featureKey = truncateDecimal(featureMap).toString()
|
|
@@ -467,4 +477,15 @@ object makedata_recsys_72_originData_20250109 {
|
|
|
}
|
|
|
default
|
|
|
}
|
|
|
+
|
|
|
+ def tsToCalendar(ts: String): Calendar = {
|
|
|
+ val ms = ts.toLong * 1000L
|
|
|
+ if (ms > 1107658247000L && ms < 2685495047000L) {
|
|
|
+ val calendar = Calendar.getInstance
|
|
|
+ calendar.setTimeInMillis(ms)
|
|
|
+ calendar
|
|
|
+ } else {
|
|
|
+ null
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|