import numpy as np import pandas as pd from client import ODPSClient recall_result = "/Users/zhao/Desktop/20241124_recall.csv" day_oss = "/Users/zhao/Desktop/20241124_day_oss.csv" odps = ODPSClient.ODPSClient() def read_day_recall_v2() -> pd.DataFrame: df = pd.read_csv(day_oss) df['cpm_rank'] = df.groupby("type")['score'].rank(method='first', ascending=False).astype(int) df['cpm_rank_2'] = df.groupby("type")['cpm'].rank(method='first', ascending=False).astype(int) df['view_rank'] = df.groupby("type")['view_rate'].rank(method='first', ascending=False).astype(int) df['day_recall_v2'] = np.where( ((df['type'] == '14d') & ((df['cpm_rank'] <= 30) | (df['cpm_rank_2'] <= 20) | (df['view_rank'] <= 30))) | ((df['type'] == '3d') & ((df['cpm_rank'] <= 50) | (df['cpm_rank_2'] <= 30) | (df['view_rank'] <= 50))) | ((df['type'] == '1d') & ((df['cpm_rank'] <= 80) | (df['cpm_rank_2'] <= 50) | (df['view_rank'] <= 100))), True, False ) df.to_csv("/Users/zhao/Desktop/3.csv", index=False) grouped_df = ( df.groupby('cid', as_index=False) # 按 CID 分组 .agg(day_recall_v2=('day_recall_v2', 'any')) # 只要有一个为 True,就为 True ) return grouped_df def read_day_recall() -> pd.DataFrame: df = pd.read_csv(day_oss) df['cpm_rank'] = df.groupby("type")['score'].rank(method='first', ascending=False).astype(int) df['view_rank'] = df.groupby("type")['view_rate'].rank(method='first', ascending=False).astype(int) df['day_recall_v1'] = np.where( ((df['type'] == '14d') & ((df['cpm_rank'] <= 30) | (df['view_rank'] <= 20))) | ((df['type'] == '3d') & ((df['cpm_rank'] <= 50) | (df['view_rank'] <= 30))) | ((df['type'] == '1d') & ((df['cpm_rank'] <= 80) | (df['view_rank'] <= 50))), True, False ) df.to_csv("/Users/zhao/Desktop/2.csv", index=False) grouped_df = ( df.groupby('cid', as_index=False) # 按 CID 分组 .agg(day_recall_v1=('day_recall_v1', 'any')) # 只要有一个为 True,就为 True ) return grouped_df def _main(): day_recall = read_day_recall() day_recall_v2 = read_day_recall_v2() recall = pd.read_csv(recall_result) recall['base_diff'] = recall['产品-17-前五组'] - recall['产品-35-前五组'] recall = (pd.merge(recall, day_recall, on='cid', how='left') .merge(day_recall_v2, on='cid', how='left')) recall.to_csv("/Users/zhao/Desktop/1.csv", index=False) print(recall) if __name__ == '__main__': _main()