ad_generate_train_test.py 1.0 KB

1234567891011121314151617181920212223242526
  1. import datetime
  2. import pandas as pd
  3. import os
  4. if __name__ == '__main__':
  5. now_date = datetime.datetime.today()
  6. dt = datetime.datetime.strftime(now_date, '%Y%m%d')
  7. train_test_data_dir = './data/train_test_data'
  8. if not os.path.exists(train_test_data_dir):
  9. os.makedirs(train_test_data_dir)
  10. # 训练集
  11. data_df_list = []
  12. for days in range(4, 19):
  13. cur_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=days), '%Y%m%d')
  14. print(f"cur_dt = {cur_dt}")
  15. cur_dt_df = pd.read_csv(f"./data/sample_train_data/{cur_dt}.csv")
  16. data_df_list.append(cur_dt_df)
  17. all_df = pd.concat(data_df_list)
  18. print(f"all data num: {all_df.shape[0]}")
  19. all_df.to_csv(f'{train_test_data_dir}/train_{dt}.csv', index=False)
  20. # 测试集
  21. test_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=3), '%Y%m%d')
  22. test_df = pd.read_csv(f"./data/sample_train_data/{test_dt}.csv")
  23. print(f"test data num: {test_df.shape[0]}")
  24. test_df.to_csv(f'{train_test_data_dir}/test_{dt}.csv', index=False)