1234567891011121314151617181920212223242526 |
- import datetime
- import pandas as pd
- import os
- if __name__ == '__main__':
- now_date = datetime.datetime.today()
- dt = datetime.datetime.strftime(now_date, '%Y%m%d')
- train_test_data_dir = './data/train_test_data'
- if not os.path.exists(train_test_data_dir):
- os.makedirs(train_test_data_dir)
- # 训练集
- data_df_list = []
- for days in range(3, 9):
- cur_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=days), '%Y%m%d')
- print(f"cur_dt = {cur_dt}")
- cur_dt_df = pd.read_csv(f"./data/sample_train_data/{cur_dt}.csv")
- data_df_list.append(cur_dt_df)
- all_df = pd.concat(data_df_list)
- print(f"all data num: {all_df.shape[0]}")
- all_df.to_csv(f'{train_test_data_dir}/train_{dt}.csv', index=False)
- # 测试集
- test_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=2), '%Y%m%d')
- test_df = pd.read_csv(f"./data/sample_train_data/{test_dt}.csv")
- print(f"test data num: {test_df.shape[0]}")
- test_df.to_csv(f'{train_test_data_dir}/test_{dt}.csv', index=False)
|