# import pandas as pd
#
# old_date_train = f"/Users/zhao/Desktop/Code/Python/model_monitor/XGB/data/all/20241012_predict.csv"
# new_date_train = f"/Users/zhao/Desktop/Code/Python/model_monitor/XGB/20241012_predict_1.csv"
#
# # 读取两个 CSV 文件
# old_df = pd.read_csv(old_date_train)
# new_df = pd.read_csv(new_date_train)
#
# if old_df.shape[0] != new_df.shape[0]:
#     print(f"新老训练数据集长度不一样 新数据集: {new_df.shape[0]}, 老数据集: {old_df.shape[0]}")
#
# old_df_col = old_df.columns
# new_df_col = new_df.columns
# if len(old_df_col) != len(new_df_col):
#     print(f"两个文件列数不一样 新文件: {new_df_col}, 老文件: {old_df_col}")
#
# for col in old_df_col:
#     if col not in new_df_col:
#         print(f"列 {col} 在老文件存在，新文件不存在")
#
# for col in new_df_col:
#     if col not in old_df_col:
#         print(f"列 {col} 在新文件存在，老文件不存在")
#
# old_df.set_index("vid", inplace=True)
# new_df.set_index("vid", inplace=True)
#
# old_dict = old_df.to_dict(orient="index")
# new_dict = new_df.to_dict(orient="index")
#
# for e in new_dict:
#     if e not in old_dict:
#         print(f"vid {e} 在新文件中存在，在老文件中不存在")
#     new_row = new_dict[e]
#     old_row = old_dict[e]
#     for col in new_df_col:
#         if col in ['vid', '曝光占比', '分子', '分母', 'label']:
#             continue
#         if col not in old_row:
#             print(f"vid {e} 的列 {col} 在老文件中不存在")
#             continue
#         # if col in new_row:
#         #     print(f"vid {e} 的列 {col} 在新文件中不存在")
#         #     continue
#         if old_row[col] != new_row[col]:
#             print(f"vid {e} 列 {col} 的值在新老文件不一样, 新文件的值: {new_row[col]}, 老文件的值: {old_row[col]}")
#
# # z_vid = set()
# # with open("/Users/zhao/Desktop/Code/Python/rov-offline/write_redis/filtered_vid", "r") as f:
# #     for line in f:
# #         z_vid.add(line.replace("\n", ""))
# #
# # p_vid = set()
# # with open("./filtered_vid.txt", "r") as f:
# #     for line in f:
# #         p_vid.add(line.replace("\n", ""))
# #
# # for e in z_vid:
# #     if e not in p_vid:
# #         print(f"VID: {e} 离线预测有，在线预测没有")
# #
# # for e in p_vid:
# #     if e not in z_vid:
# #         print(f"VID: {e} 在线预测有，离线预测没有")