check_data.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # import pandas as pd
  2. #
  3. # old_date_train = f"/Users/zhao/Desktop/Code/Python/model_monitor/XGB/data/all/20241012_predict.csv"
  4. # new_date_train = f"/Users/zhao/Desktop/Code/Python/model_monitor/XGB/20241012_predict_1.csv"
  5. #
  6. # # 读取两个 CSV 文件
  7. # old_df = pd.read_csv(old_date_train)
  8. # new_df = pd.read_csv(new_date_train)
  9. #
  10. # if old_df.shape[0] != new_df.shape[0]:
  11. # print(f"新老训练数据集长度不一样 新数据集: {new_df.shape[0]}, 老数据集: {old_df.shape[0]}")
  12. #
  13. # old_df_col = old_df.columns
  14. # new_df_col = new_df.columns
  15. # if len(old_df_col) != len(new_df_col):
  16. # print(f"两个文件列数不一样 新文件: {new_df_col}, 老文件: {old_df_col}")
  17. #
  18. # for col in old_df_col:
  19. # if col not in new_df_col:
  20. # print(f"列 {col} 在老文件存在,新文件不存在")
  21. #
  22. # for col in new_df_col:
  23. # if col not in old_df_col:
  24. # print(f"列 {col} 在新文件存在,老文件不存在")
  25. #
  26. # old_df.set_index("vid", inplace=True)
  27. # new_df.set_index("vid", inplace=True)
  28. #
  29. # old_dict = old_df.to_dict(orient="index")
  30. # new_dict = new_df.to_dict(orient="index")
  31. #
  32. # for e in new_dict:
  33. # if e not in old_dict:
  34. # print(f"vid {e} 在新文件中存在,在老文件中不存在")
  35. # new_row = new_dict[e]
  36. # old_row = old_dict[e]
  37. # for col in new_df_col:
  38. # if col in ['vid', '曝光占比', '分子', '分母', 'label']:
  39. # continue
  40. # if col not in old_row:
  41. # print(f"vid {e} 的列 {col} 在老文件中不存在")
  42. # continue
  43. # # if col in new_row:
  44. # # print(f"vid {e} 的列 {col} 在新文件中不存在")
  45. # # continue
  46. # if old_row[col] != new_row[col]:
  47. # print(f"vid {e} 列 {col} 的值在新老文件不一样, 新文件的值: {new_row[col]}, 老文件的值: {old_row[col]}")
  48. #
  49. # # z_vid = set()
  50. # # with open("/Users/zhao/Desktop/Code/Python/rov-offline/write_redis/filtered_vid", "r") as f:
  51. # # for line in f:
  52. # # z_vid.add(line.replace("\n", ""))
  53. # #
  54. # # p_vid = set()
  55. # # with open("./filtered_vid.txt", "r") as f:
  56. # # for line in f:
  57. # # p_vid.add(line.replace("\n", ""))
  58. # #
  59. # # for e in z_vid:
  60. # # if e not in p_vid:
  61. # # print(f"VID: {e} 离线预测有,在线预测没有")
  62. # #
  63. # # for e in p_vid:
  64. # # if e not in z_vid:
  65. # # print(f"VID: {e} 在线预测有,离线预测没有")