ad_generate_train_test.py 1.1 KB

12345678910111213141516171819202122232425262728
  1. import datetime
  2. import pandas as pd
  3. import os
  4. if __name__ == '__main__':
  5. now_date = datetime.datetime.today()
  6. dt = datetime.datetime.strftime(now_date, '%Y%m%d')
  7. print(f"now_date: {dt}")
  8. train_test_data_dir = './data/train_test_data'
  9. if not os.path.exists(train_test_data_dir):
  10. os.makedirs(train_test_data_dir)
  11. # 训练集
  12. data_df_list = []
  13. for days in range(16, 23):
  14. cur_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=days), '%Y%m%d')
  15. print(f"cur_dt = {cur_dt}")
  16. cur_dt_df = pd.read_csv(f"./data/sample_train_data/{cur_dt}.csv")
  17. data_df_list.append(cur_dt_df)
  18. all_df = pd.concat(data_df_list)
  19. print(f"all data num: {all_df.shape[0]}")
  20. all_df.to_csv(f'{train_test_data_dir}/train_{dt}.csv', index=False)
  21. # 测试集
  22. test_dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=15), '%Y%m%d')
  23. print(f"test data dt: {test_dt}")
  24. test_df = pd.read_csv(f"./data/sample_train_data/{test_dt}.csv")
  25. print(f"test data num: {test_df.shape[0]}")
  26. test_df.to_csv(f'{train_test_data_dir}/test_{dt}.csv', index=False)