Parcourir la source

添加6-7小时训练样本

jch il y a 6 jours
Parent
commit
c4a130a2c5

+ 1 - 1
ad/25_xgb_make_data_origin_bucket.sh

@@ -135,7 +135,7 @@ make_bucket_feature_from_origin_to_hive() {
   --conf spark.dynamicAllocation.maxExecutors=100 \
   ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
   beginStr:${today_early_1} endStr:${today_early_1} \
-  filterHours:${FILTER_HOURS:-00,01,02,03,04,05,06,07} \
+  filterHours:${FILTER_HOURS:-00,01,02,03,04,05} \
   filterAdverIds:${FILTER_ADVER_IDS} \
   filterNames:_4h_,_5h_,adid_,targeting_conversion_ \
   outputTable:${outputTable1} \

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketDataFromOriginToHive_20250522.scala

@@ -115,7 +115,7 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250522 {
     // 3 循环执行数据生产
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
     for (dt <- dateRange) {
-      val timeRange = MyDateUtils.getDateHourRange(dt + "08", dt + "23")
+      val timeRange = MyDateUtils.getDateHourRange(dt + "06", dt + "23")
       val recordRdd = timeRange.map { dt_hh =>
           val dt = dt_hh.substring(0, 8)
           val hh = dt_hh.substring(8, 10)