|  | @@ -1,131 +1,56 @@
 | 
	
		
			
				|  |  |  import argparse
 | 
	
		
			
				|  |  |  import gzip
 | 
	
		
			
				|  |  |  import os.path
 | 
	
		
			
				|  |  | -from collections import OrderedDict
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  import pandas as pd
 | 
	
		
			
				|  |  |  from hdfs import InsecureClient
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  client = InsecureClient("http://master-1-1.c-7f31a3eea195cb73.cn-hangzhou.emr.aliyuncs.com:9870", user="spark")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -SEGMENT_BASE_PATH = os.environ.get("SEGMENT_BASE_PATH", "/dw/recommend/model/36_score_calibration_file")
 | 
	
		
			
				|  |  | +SEGMENT_BASE_PATH = os.environ.get("SEGMENT_BASE_PATH", "/dw/recommend/model/36_model_attachment/score_calibration_file")
 | 
	
		
			
				|  |  |  PREDICT_CACHE_PATH = os.environ.get("PREDICT_CACHE_PATH", "/root/zhaohp/XGB/predict_cache")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -def read_predict_from_local_txt(txt_file) -> list:
 | 
	
		
			
				|  |  | -    result = []
 | 
	
		
			
				|  |  | -    with open(txt_file, "r") as f:
 | 
	
		
			
				|  |  | -        for line in f.readlines():
 | 
	
		
			
				|  |  | -            sp = line.replace("\n", "").split("\t")
 | 
	
		
			
				|  |  | -            if len(sp) == 4:
 | 
	
		
			
				|  |  | -                label = int(sp[0])
 | 
	
		
			
				|  |  | -                cid = sp[3].split("_")[0]
 | 
	
		
			
				|  |  | -                score = float(sp[2].replace("[", "").replace("]", "").split(",")[1])
 | 
	
		
			
				|  |  | -                result.append({
 | 
	
		
			
				|  |  | -                    "label": label,
 | 
	
		
			
				|  |  | -                    "cid": cid,
 | 
	
		
			
				|  |  | -                    "score": score
 | 
	
		
			
				|  |  | -                })
 | 
	
		
			
				|  |  | -    return result
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -def read_predict_from_hdfs(hdfs_path: str) -> list:
 | 
	
		
			
				|  |  | -    if not hdfs_path.endswith("/"):
 | 
	
		
			
				|  |  | -        hdfs_path += "/"
 | 
	
		
			
				|  |  | -    result = []
 | 
	
		
			
				|  |  | -    for file in client.list(hdfs_path):
 | 
	
		
			
				|  |  | -        with client.read(hdfs_path + file) as reader:
 | 
	
		
			
				|  |  | -            with gzip.GzipFile(fileobj=reader, mode="rb") as gz_file:
 | 
	
		
			
				|  |  | -                for line in gz_file.read().decode("utf-8").split("\n"):
 | 
	
		
			
				|  |  | -                    split = line.split("\t")
 | 
	
		
			
				|  |  | -                    if len(split) == 4:
 | 
	
		
			
				|  |  | -                        cid = split[3].split("_")[0]
 | 
	
		
			
				|  |  | -                        label = int(split[0])
 | 
	
		
			
				|  |  | -                        score = float(split[2].replace("[", "").replace("]", "").split(",")[1])
 | 
	
		
			
				|  |  | -                        result.append({
 | 
	
		
			
				|  |  | -                            "cid": cid,
 | 
	
		
			
				|  |  | -                            "label": label,
 | 
	
		
			
				|  |  | -                            "score": score
 | 
	
		
			
				|  |  | -                        })
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    return result
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -def _segment_v1(scores, step):
 | 
	
		
			
				|  |  | -    bins = []
 | 
	
		
			
				|  |  | -    for i in range(0, len(scores), int((len(scores) / step))):
 | 
	
		
			
				|  |  | -        if i == 0:
 | 
	
		
			
				|  |  | -            bins.append(0)
 | 
	
		
			
				|  |  | -        else:
 | 
	
		
			
				|  |  | -            bins.append(scores[i])
 | 
	
		
			
				|  |  | -    bins.append(1)
 | 
	
		
			
				|  |  | -    return list(OrderedDict.fromkeys(bins))
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -def segment_calc_diff_rate_by_score(df: pd.DataFrame, segment_file_path: str, step=100) -> [pd.DataFrame, pd.DataFrame]:
 | 
	
		
			
				|  |  | -    sored_df = df.sort_values(by=['score'])
 | 
	
		
			
				|  |  | -    # 评估分数分段
 | 
	
		
			
				|  |  | -    scores = sored_df['score'].values
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    bins = _segment_v1(scores, step)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    # 等分分桶
 | 
	
		
			
				|  |  | -    # split_indices = np.array_split(np.arange(len(scores)), step)
 | 
	
		
			
				|  |  | -    # bins = [scores[index[0]] for index in split_indices] + [scores[split_indices[-1][-1]]]
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    sored_df['score_segment'] = pd.cut(sored_df['score'], bins=bins)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    # 计算分段内分数的差异
 | 
	
		
			
				|  |  | -    group_df = sored_df.groupby("score_segment", observed=True).agg(
 | 
	
		
			
				|  |  | -        segment_label_sum=('label', 'sum'),
 | 
	
		
			
				|  |  | -        segment_label_cnt=('label', 'count'),
 | 
	
		
			
				|  |  | -        segment_score_avg=('score', 'mean'),
 | 
	
		
			
				|  |  | -    ).reset_index()
 | 
	
		
			
				|  |  | -    group_df['segment_true_score'] = group_df['segment_label_sum'] / group_df['segment_label_cnt']
 | 
	
		
			
				|  |  | -    group_df['segment_diff_rate'] = (group_df['segment_score_avg'] / group_df['segment_true_score'] - 1).mask(group_df['segment_true_score'] == 0, 0)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    # 完整的分段文件保存
 | 
	
		
			
				|  |  | -    csv_data = group_df.to_csv(sep="\t", index=False)
 | 
	
		
			
				|  |  | -    with client.write(segment_file_path, encoding='utf-8', overwrite=True) as writer:
 | 
	
		
			
				|  |  | -        writer.write(csv_data)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    filtered_df = group_df[(abs(group_df['segment_diff_rate']) >= 0.2) & (group_df['segment_label_cnt'] >= 1000)]
 | 
	
		
			
				|  |  | -    filtered_df = filtered_df[['score_segment', 'segment_diff_rate']]
 | 
	
		
			
				|  |  | -    # 每条曝光数据添加对应分数的diff
 | 
	
		
			
				|  |  | -    merged_df = pd.merge(sored_df, filtered_df, on="score_segment", how="left")
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    merged_df['segment_diff_rate'] = merged_df['segment_diff_rate'].fillna(0)
 | 
	
		
			
				|  |  | -    return merged_df, filtered_df
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -def read_and_calibration_predict(predict_path: str, step=100) -> [pd.DataFrame, pd.DataFrame, pd.DataFrame]:
 | 
	
		
			
				|  |  | -    """
 | 
	
		
			
				|  |  | -    读取评估结果,并进行校准
 | 
	
		
			
				|  |  | -    """
 | 
	
		
			
				|  |  | -    # 本地调试使用
 | 
	
		
			
				|  |  | -    # predicts = read_predict_from_local_txt(predict_path)
 | 
	
		
			
				|  |  | -    predicts = read_predict_from_hdfs(predict_path)
 | 
	
		
			
				|  |  | -    df = pd.DataFrame(predicts)
 | 
	
		
			
				|  |  | +def parse_predict_line(line: str) -> [bool, dict]:
 | 
	
		
			
				|  |  | +    sp = line.replace("\n", "").split("\t")
 | 
	
		
			
				|  |  | +    if len(sp) == 4:
 | 
	
		
			
				|  |  | +        label = int(sp[0])
 | 
	
		
			
				|  |  | +        cid = sp[3].split("_")[0]
 | 
	
		
			
				|  |  | +        score = float(sp[2].replace("[", "").replace("]", "").split(",")[1])
 | 
	
		
			
				|  |  | +        return True, {
 | 
	
		
			
				|  |  | +            "label": label,
 | 
	
		
			
				|  |  | +            "cid": cid,
 | 
	
		
			
				|  |  | +            "score": score
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +    return False, {}
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    # 模型分分段计算与真实ctcvr的dff_rate
 | 
	
		
			
				|  |  | -    predict_basename = os.path.basename(predict_path)
 | 
	
		
			
				|  |  | -    if predict_basename.endswith("/"):
 | 
	
		
			
				|  |  | -        predict_basename = predict_basename[:-1]
 | 
	
		
			
				|  |  | -    df, segment_df = segment_calc_diff_rate_by_score(df, segment_file_path=f"{SEGMENT_BASE_PATH}/{predict_basename}.txt", step=100)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    # 生成校准后的分数
 | 
	
		
			
				|  |  | -    df['score_2'] = df['score'] / (1 + df['segment_diff_rate'])
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    # 按CID统计真实ctcvr和校准前后的平均模型分
 | 
	
		
			
				|  |  | -    grouped_df = df.groupby("cid").agg(
 | 
	
		
			
				|  |  | -        view=('cid', 'size'),
 | 
	
		
			
				|  |  | -        conv=('label', 'sum'),
 | 
	
		
			
				|  |  | -        score_avg=('score', lambda x: round(x.mean(), 6)),
 | 
	
		
			
				|  |  | -        score_2_avg=('score_2', lambda x: round(x.mean(), 6)),
 | 
	
		
			
				|  |  | -    ).reset_index()
 | 
	
		
			
				|  |  | -    grouped_df['true_ctcvr'] = grouped_df['conv'] / grouped_df['view']
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    return df, grouped_df, segment_df
 | 
	
		
			
				|  |  | +def read_predict_file(file_path: str) -> pd.DataFrame:
 | 
	
		
			
				|  |  | +    result = []
 | 
	
		
			
				|  |  | +    if file_path.startswith("/dw"):
 | 
	
		
			
				|  |  | +        if not file_path.endswith("/"):
 | 
	
		
			
				|  |  | +            file_path += "/"
 | 
	
		
			
				|  |  | +        for file in client.list(file_path):
 | 
	
		
			
				|  |  | +            with client.read(file_path + file) as reader:
 | 
	
		
			
				|  |  | +                with gzip.GzipFile(fileobj=reader, mode="rb") as gz_file:
 | 
	
		
			
				|  |  | +                    for line in gz_file.read().decode("utf-8").split("\n"):
 | 
	
		
			
				|  |  | +                        b, d = parse_predict_line(line)
 | 
	
		
			
				|  |  | +                        if b: result.append(d)
 | 
	
		
			
				|  |  | +    else:
 | 
	
		
			
				|  |  | +        with open(file_path, "r") as f:
 | 
	
		
			
				|  |  | +            for line in f.readlines():
 | 
	
		
			
				|  |  | +                b, d = parse_predict_line(line)
 | 
	
		
			
				|  |  | +                if b: result.append(d)
 | 
	
		
			
				|  |  | +    return pd.DataFrame(result)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def calibration_file_save(df: pd.DataFrame, file_path: str):
 | 
	
		
			
				|  |  | +    if file_path.startswith("/dw"):
 | 
	
		
			
				|  |  | +        # 完整的分段文件保存
 | 
	
		
			
				|  |  | +        with client.write(file_path, encoding='utf-8', overwrite=True) as writer:
 | 
	
		
			
				|  |  | +            writer.write(df.to_csv(sep="\t", index=False))
 | 
	
		
			
				|  |  | +    else:
 | 
	
		
			
				|  |  | +        df.tocsv(file_path, sep="\t", index=False)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def predict_local_save_for_auc(old_df: pd.DataFrame, new_df: pd.DataFrame):
 | 
	
	
		
			
				|  | @@ -134,27 +59,96 @@ def predict_local_save_for_auc(old_df: pd.DataFrame, new_df: pd.DataFrame):
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  |      d = {"old": old_df, "new": new_df}
 | 
	
		
			
				|  |  |      for key in d:
 | 
	
		
			
				|  |  | -        df = d[key][['label', "score"]]
 | 
	
		
			
				|  |  | -        df.to_csv(f"{PREDICT_CACHE_PATH}/{key}_1.txt", sep="\t", index=False, header=False)
 | 
	
		
			
				|  |  | -        df = d[key][['label', "score_2"]]
 | 
	
		
			
				|  |  | -        df.to_csv(f"{PREDICT_CACHE_PATH}/{key}_2.txt", sep="\t", index=False, header=False)
 | 
	
		
			
				|  |  | +        df = d[key]
 | 
	
		
			
				|  |  | +        if 'score' in df.columns:
 | 
	
		
			
				|  |  | +            score_df = df[['label', "score"]]
 | 
	
		
			
				|  |  | +            score_df.to_csv(f"{PREDICT_CACHE_PATH}/{key}_1.txt", sep="\t", index=False, header=False)
 | 
	
		
			
				|  |  | +        if 'score_2' in df.columns:
 | 
	
		
			
				|  |  | +            score_2_df = d[key][['label', "score_2"]]
 | 
	
		
			
				|  |  | +            score_2_df.to_csv(f"{PREDICT_CACHE_PATH}/{key}_2.txt", sep="\t", index=False, header=False)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def save_full_calibration_file(df: pd.DataFrame, segment_file_path: str):
 | 
	
		
			
				|  |  | +    if segment_file_path.startswith("/dw"):
 | 
	
		
			
				|  |  | +        # 完整的分段文件保存
 | 
	
		
			
				|  |  | +        with client.write(segment_file_path, encoding='utf-8', overwrite=True) as writer:
 | 
	
		
			
				|  |  | +            writer.write(df.to_csv(sep="\t", index=False))
 | 
	
		
			
				|  |  | +    else:
 | 
	
		
			
				|  |  | +        df.to_csv(segment_file_path, sep="\t", index=False)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def get_predict_calibration_file(df: pd.DataFrame, predict_basename: str) -> [pd.DataFrame]:
 | 
	
		
			
				|  |  | +    """
 | 
	
		
			
				|  |  | +    计算模型分的diff_rate
 | 
	
		
			
				|  |  | +    """
 | 
	
		
			
				|  |  | +    agg_df = predict_df_agg(df)
 | 
	
		
			
				|  |  | +    agg_df['diff_rate'] = (agg_df['score_avg'] / agg_df['true_ctcvr'] - 1).mask(agg_df['true_ctcvr'] == 0, 0).round(6)
 | 
	
		
			
				|  |  | +    condition = 'view > 1000 and diff_rate >= 0.2'
 | 
	
		
			
				|  |  | +    save_full_calibration_file(agg_df, f"{SEGMENT_BASE_PATH}/{predict_basename}.txt")
 | 
	
		
			
				|  |  | +    calibration = agg_df[(agg_df['view'] > 1000) & ((agg_df['diff_rate'] >= 0.2) | (agg_df['diff_rate'] <= 0.2)) & agg_df['diff_rate'] != 0]
 | 
	
		
			
				|  |  | +    return calibration
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def get_predict_basename(predict_path) -> [str]:
 | 
	
		
			
				|  |  | +    """
 | 
	
		
			
				|  |  | +    获取文件路径的最后一部分,作为与模型关联的文件名
 | 
	
		
			
				|  |  | +    """
 | 
	
		
			
				|  |  | +    predict_basename = os.path.basename(predict_path)
 | 
	
		
			
				|  |  | +    if predict_basename.endswith("/"):
 | 
	
		
			
				|  |  | +        predict_basename = predict_basename[:-1]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    return predict_basename
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def calc_calibration_score2(df: pd.DataFrame, calibration_df: pd.DataFrame) -> [pd.DataFrame]:
 | 
	
		
			
				|  |  | +    calibration_df = calibration_df[['cid', 'diff_rate']]
 | 
	
		
			
				|  |  | +    df = pd.merge(df, calibration_df, on='cid', how='left').fillna(0)
 | 
	
		
			
				|  |  | +    df['score_2'] = df['score'] / (1 + df['diff_rate'])
 | 
	
		
			
				|  |  | +    return df
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def predict_df_agg(df: pd.DataFrame) -> [pd.DataFrame]:
 | 
	
		
			
				|  |  | +    # 基础聚合操作
 | 
	
		
			
				|  |  | +    agg_operations = {
 | 
	
		
			
				|  |  | +        'view': ('cid', 'size'),
 | 
	
		
			
				|  |  | +        'conv': ('label', 'sum'),
 | 
	
		
			
				|  |  | +        'score_avg': ('score', lambda x: round(x.mean(), 6)),
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # 如果存在 score_2 列,则增加相关聚合
 | 
	
		
			
				|  |  | +    if "score_2" in df.columns:
 | 
	
		
			
				|  |  | +        agg_operations['score_2_avg'] = ('score_2', lambda x: round(x.mean(), 6))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    grouped_df = df.groupby("cid").agg(**agg_operations).reset_index()
 | 
	
		
			
				|  |  | +    grouped_df['true_ctcvr'] = grouped_df['conv'] / grouped_df['view']
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    return grouped_df
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def _main(old_predict_path: str, new_predict_path: str, calibration_file: str, analyse_file: str):
 | 
	
		
			
				|  |  | -    old_df, old_group_df, old_segment_df = read_and_calibration_predict(old_predict_path)
 | 
	
		
			
				|  |  | -    new_df, new_group_df, new_segment_df = read_and_calibration_predict(new_predict_path)
 | 
	
		
			
				|  |  | +    old_df = read_predict_file(old_predict_path)
 | 
	
		
			
				|  |  | +    new_df = read_predict_file(new_predict_path)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    old_calibration_df = get_predict_calibration_file(old_df, get_predict_basename(old_predict_path))
 | 
	
		
			
				|  |  | +    old_df = calc_calibration_score2(old_df, old_calibration_df)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    new_calibration_df = get_predict_calibration_file(new_df, get_predict_basename(new_predict_path))
 | 
	
		
			
				|  |  | +    new_df = calc_calibration_score2(new_df, new_calibration_df)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # 本地保存label、score以及校准后的score,用于计算AUC等信息
 | 
	
		
			
				|  |  |      predict_local_save_for_auc(old_df, new_df)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    # 分段文件保存, 此处保留的最后使用的分段文件,不是所有的分段
 | 
	
		
			
				|  |  | -    new_segment_df.to_csv(calibration_file, sep='\t', index=False, header=False)
 | 
	
		
			
				|  |  | +    # 新模型校准文件保存本地,用于同步OSS
 | 
	
		
			
				|  |  | +    new_calibration_df[['cid', 'diff_rate']].to_csv(calibration_file, sep="\t", index=False, header=False)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    # 字段重命名,和列过滤
 | 
	
		
			
				|  |  | -    old_group_df.rename(columns={'score_avg': 'old_score_avg', 'score_2_avg': 'old_score_2_avg'}, inplace=True)
 | 
	
		
			
				|  |  | -    new_group_df.rename(columns={'score_avg': 'new_score_avg', 'score_2_avg': 'new_score_2_avg'}, inplace=True)
 | 
	
		
			
				|  |  | -    old_group_df = old_group_df[['cid', 'view', 'conv', 'true_ctcvr', 'old_score_avg', 'old_score_2_avg']]
 | 
	
		
			
				|  |  | -    new_group_df = new_group_df[['cid', 'new_score_avg', 'new_score_2_avg']]
 | 
	
		
			
				|  |  | +    old_agg_df = predict_df_agg(old_df)
 | 
	
		
			
				|  |  | +    new_agg_df = predict_df_agg(new_df)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    # 字段重命名,和列过滤
 | 
	
		
			
				|  |  | +    old_agg_df.rename(columns={'score_avg': 'old_score_avg', 'score_2_avg': 'old_score_2_avg'}, inplace=True)
 | 
	
		
			
				|  |  | +    new_agg_df.rename(columns={'score_avg': 'new_score_avg', 'score_2_avg': 'new_score_2_avg'}, inplace=True)
 | 
	
		
			
				|  |  | +    old_group_df = old_agg_df[['cid', 'view', 'conv', 'true_ctcvr', 'old_score_avg', 'old_score_2_avg']]
 | 
	
		
			
				|  |  | +    new_group_df = new_agg_df[['cid', 'new_score_avg', 'new_score_2_avg']]
 | 
	
		
			
				|  |  |      merged = pd.merge(old_group_df, new_group_df, on='cid', how='left')
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      # 计算与真实ctcvr的差异值
 | 
	
	
		
			
				|  | @@ -183,7 +177,7 @@ def _main(old_predict_path: str, new_predict_path: str, calibration_file: str, a
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  if __name__ == '__main__':
 | 
	
		
			
				|  |  | -    parser = argparse.ArgumentParser(description="model_predict_analyse.py")
 | 
	
		
			
				|  |  | +    parser = argparse.ArgumentParser(description="model_predict_analyse_20241101.py")
 | 
	
		
			
				|  |  |      parser.add_argument("-op", "--old_predict_path", required=True, help="老模型评估结果")
 | 
	
		
			
				|  |  |      parser.add_argument("-np", "--new_predict_path", required=True, help="新模型评估结果")
 | 
	
		
			
				|  |  |      parser.add_argument("-af", "--analyse_file", required=True, help="最后计算结果的保存路径")
 |