|
@@ -229,19 +229,42 @@ def fetch_data(label_datetime: datetime, feature_start_datetime: datetime, view_
|
|
|
return df
|
|
|
|
|
|
|
|
|
-def xgb_multi_dt_data(t_1_label_dt: datetime):
|
|
|
+def xgb_train_multi_dt_data(t_1_label_dt: datetime):
|
|
|
+ """
|
|
|
+ XGB模型多天训练数据
|
|
|
+ :param t_1_label_dt:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
with concurrent.futures.ThreadPoolExecutor(3) as executor:
|
|
|
- logger.info(f"VOV模型特征数据处理:t_1_label_future.label_datetime: {t_1_label_dt.strftime('%Y%m%d')}")
|
|
|
- t_1_label_future = executor.submit(fetch_data, t_1_label_dt, t_1_label_dt - timedelta(1), t_1_label_dt)
|
|
|
+ t_1_feature_dt = t_1_label_dt - timedelta(1)
|
|
|
+ logger.info(
|
|
|
+ f"VOV模型特征数据处理 --- t_1_label_future:"
|
|
|
+ f"label_datetime: {t_1_label_dt.strftime('%Y%m%d')}"
|
|
|
+ f"feature_datetime: {t_1_feature_dt.strftime('%Y%m%d')}"
|
|
|
+ f"view_rate_datetime: {t_1_label_dt.strftime('%Y%m%d')}"
|
|
|
+ )
|
|
|
+ t_1_label_future = executor.submit(fetch_data, t_1_label_dt, t_1_feature_dt, t_1_label_dt)
|
|
|
|
|
|
t_2_label_dt = t_1_label_dt - timedelta(1)
|
|
|
- logger.info(f"VOV模型特征数据处理:t_2_label_future.label_datetime: {t_2_label_dt.strftime('%Y%m%d')}")
|
|
|
- t_2_label_future = executor.submit(fetch_data, t_2_label_dt, t_2_label_dt - timedelta(1), t_2_label_dt)
|
|
|
+ t_2_feature_dt = t_2_label_dt - timedelta(1)
|
|
|
+ logger.info(
|
|
|
+ f"VOV模型特征数据处理 --- t_2_label_future:"
|
|
|
+ f"label_datetime: {t_2_label_dt.strftime('%Y%m%d')}"
|
|
|
+ f"feature_datetime: {t_2_feature_dt.strftime('%Y%m%d')}"
|
|
|
+ f"view_rate_datetime: {t_2_label_dt.strftime('%Y%m%d')}"
|
|
|
+ )
|
|
|
+ t_2_label_future = executor.submit(fetch_data, t_2_label_dt, t_2_feature_dt, t_2_label_dt)
|
|
|
|
|
|
t_3_label_dt = t_1_label_dt - timedelta(2)
|
|
|
- logger.info(f"VOV模型特征数据处理:t_3_label_future.label_datetime: {t_3_label_dt.strftime('%Y%m%d')}")
|
|
|
+ t_3_feature_dt = t_3_label_dt - timedelta(1)
|
|
|
+ logger.info(
|
|
|
+ f"VOV模型特征数据处理 --- t_3_label_future:"
|
|
|
+ f"label_datetime: {t_3_label_dt.strftime('%Y%m%d')}"
|
|
|
+ f"feature_datetime: {t_3_feature_dt.strftime('%Y%m%d')}"
|
|
|
+ f"view_rate_datetime: {t_3_label_dt.strftime('%Y%m%d')}"
|
|
|
+ )
|
|
|
+ t_3_label_future = executor.submit(fetch_data, t_3_label_dt, t_3_feature_dt, t_3_label_dt)
|
|
|
|
|
|
- t_3_label_future = executor.submit(fetch_data, t_3_label_dt, t_3_label_dt - timedelta(1), t_3_label_dt)
|
|
|
t_1_label_df = t_1_label_future.result()
|
|
|
t_2_label_df = t_2_label_future.result()
|
|
|
t_3_label_df = t_3_label_future.result()
|
|
@@ -249,9 +272,25 @@ def xgb_multi_dt_data(t_1_label_dt: datetime):
|
|
|
return pd.concat([t_1_label_df, t_2_label_df, t_3_label_df], ignore_index=True)
|
|
|
|
|
|
|
|
|
+def xgb_predict_dt_data(label_datetime: datetime):
|
|
|
+ """
|
|
|
+ 获取预估数据
|
|
|
+ :param label_datetime:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ feature_start_datetime = label_datetime
|
|
|
+ logger.info(
|
|
|
+ f"VOV模型预测数据处理 --- predict_df: "
|
|
|
+ f"label_datetime: {label_datetime.strftime('%Y%m%d')}"
|
|
|
+ f"feature_datetime: {feature_start_datetime.strftime('%Y%m%d')}"
|
|
|
+ f"view_rate_datetime: {label_datetime.strftime('%Y%m%d')}"
|
|
|
+ )
|
|
|
+ return fetch_data(label_datetime, feature_start_datetime, label_datetime)
|
|
|
+
|
|
|
+
|
|
|
def _main():
|
|
|
logger.info(f"XGB模型训练")
|
|
|
- train_df = xgb_multi_dt_data((datetime.now() - timedelta(days=1)))
|
|
|
+ train_df = xgb_train_multi_dt_data((datetime.now() - timedelta(days=1)))
|
|
|
trains_array = train_df[features_name].values
|
|
|
trains_label_array = train_df['label'].values
|
|
|
|
|
@@ -273,10 +312,7 @@ def _main():
|
|
|
model.fit(trains_array, trains_label_array)
|
|
|
|
|
|
logger.info("获取评测数据")
|
|
|
- start_label_datetime = datetime.now() - timedelta(days=1)
|
|
|
- feature_start_datetime = start_label_datetime
|
|
|
-
|
|
|
- predict_df = fetch_data(start_label_datetime, feature_start_datetime, start_label_datetime)
|
|
|
+ predict_df = xgb_predict_dt_data((datetime.now() - timedelta(days=1)))
|
|
|
tests_array = predict_df[features_name].values
|
|
|
y_pred = model.predict_proba(tests_array)[:, 1]
|
|
|
predict_df["y_pred"] = y_pred
|