#!/usr/bin/env bash # recall_regression_test.sh # # 召回测试接口 — 部署前后回归对比脚本 # # 用法: # ./recall_regression_test.sh run baseline # 部署前: 跑全部用例,存到 results/baseline/ # ./recall_regression_test.sh run verify # 部署后: 跑同样用例,存到 results/verify/ # ./recall_regression_test.sh diff baseline verify # 比对两次输出 # # 可选环境变量: # BASE_URL 默认 https://api-internal.piaoquantv.com/videoVector # RESULTS_DIR 默认 ./recall_test_results # # 依赖: curl, python3 (用于 json 格式化) set -euo pipefail BASE_URL="${BASE_URL:-https://api-internal.piaoquantv.com/videoVector}" RESULTS_DIR="${RESULTS_DIR:-$(cd "$(dirname "$0")" && pwd)/recall_test_results}" # ===== 测试输入 ===== # 真实视频 ID (用户确认) VIDEO_ID_PRIMARY=64632804 # 候补 ID (从 prod matchByText 实际返回中取得) VIDEO_ID_SECONDARY=67688956 # 文本召回查询词 QUERY_TEXTS=( "夏季减肥小妙招" "宝宝辅食做法" "婆婆和儿媳" ) # 配置编码 CONFIG_CODES=(VIDEO_TOPIC VIDEO_INSPIRATION) # Top-N (固定,避免参数不同导致的 size 差异) TOP_N=10 # ===== 工具函数 ===== log() { echo "[$(date +%H:%M:%S)] $*" >&2; } # 美化 + 标准化 JSON (排序 key、UTF-8 不转义、固定缩进) → 稳定 diff prettify() { python3 -c ' import sys, json try: d = json.load(sys.stdin) print(json.dumps(d, indent=2, ensure_ascii=False, sort_keys=True)) except Exception as e: sys.stderr.write("JSON parse failed: %s\n" % e) sys.exit(1) ' } # 调 GET do_get() { local name="$1"; shift local path="$1"; shift local out_dir="$1"; shift local raw="$out_dir/${name}.raw.txt" local pretty="$out_dir/${name}.json" log "GET $name -> $path" if curl -sS --max-time 30 "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then if prettify < "$raw" > "$pretty" 2>/dev/null; then rm -f "$raw" "$raw.err" else log " WARN: $name 返回非 JSON,保留 .raw.txt" mv "$raw" "$pretty" rm -f "$raw.err" fi else log " ERROR: $name curl 失败" mv "$raw.err" "$pretty" rm -f "$raw" fi } # 调 POST do_post() { local name="$1"; shift local path="$1"; shift local body="$1"; shift local out_dir="$1"; shift local raw="$out_dir/${name}.raw.txt" local pretty="$out_dir/${name}.json" log "POST $name -> $path body=$body" if curl -sS --max-time 60 -X POST \ -H "Content-Type: application/json" \ -d "$body" \ "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then if prettify < "$raw" > "$pretty" 2>/dev/null; then rm -f "$raw" "$raw.err" else log " WARN: $name 返回非 JSON,保留 .raw.txt" mv "$raw" "$pretty" rm -f "$raw.err" fi else log " ERROR: $name curl 失败" mv "$raw.err" "$pretty" rm -f "$raw" fi } # ===== run 模式 ===== run_tests() { local label="$1" local out_dir="$RESULTS_DIR/$label" mkdir -p "$out_dir" log "==> 输出目录: $out_dir" log "==> BASE_URL: $BASE_URL" # --- videoDetail --- do_get "videoDetail__primary" "/recallTest/videoDetail?videoId=$VIDEO_ID_PRIMARY" "$out_dir" do_get "videoDetail__secondary" "/recallTest/videoDetail?videoId=$VIDEO_ID_SECONDARY" "$out_dir" do_get "videoDetail__missing" "/recallTest/videoDetail?videoId=1" "$out_dir" # --- aiUnderstanding --- do_get "aiUnderstanding__primary" "/recallTest/aiUnderstanding?videoId=$VIDEO_ID_PRIMARY" "$out_dir" # --- deconstructPoints --- do_get "deconstructPoints__primary" "/recallTest/deconstructPoints?videoId=$VIDEO_ID_PRIMARY" "$out_dir" do_get "deconstructPoints__secondary" "/recallTest/deconstructPoints?videoId=$VIDEO_ID_SECONDARY" "$out_dir" # --- matchByText × (queryText × configCode) --- local idx=0 for qt in "${QUERY_TEXTS[@]}"; do idx=$((idx+1)) for cc in "${CONFIG_CODES[@]}"; do do_post \ "matchByText__q${idx}__${cc}" \ "/recallTest/matchByText" \ "{\"queryText\":\"$qt\",\"configCode\":\"$cc\",\"topN\":$TOP_N}" \ "$out_dir" done # 默认 configCode (不传) do_post \ "matchByText__q${idx}__DEFAULT" \ "/recallTest/matchByText" \ "{\"queryText\":\"$qt\",\"topN\":$TOP_N}" \ "$out_dir" done # --- matchByText 边界: 空文本 --- do_post "matchByText__empty" "/recallTest/matchByText" \ "{\"queryText\":\"\",\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir" # --- matchByVideoId × configCode --- for cc in "${CONFIG_CODES[@]}"; do do_post \ "matchByVideoId__primary__${cc}" \ "/recallTest/matchByVideoId" \ "{\"videoId\":$VIDEO_ID_PRIMARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \ "$out_dir" do_post \ "matchByVideoId__secondary__${cc}" \ "/recallTest/matchByVideoId" \ "{\"videoId\":$VIDEO_ID_SECONDARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \ "$out_dir" done # 默认 configCode do_post \ "matchByVideoId__primary__DEFAULT" \ "/recallTest/matchByVideoId" \ "{\"videoId\":$VIDEO_ID_PRIMARY,\"topN\":$TOP_N}" \ "$out_dir" # --- matchByVideoId 边界: 不存在的 ID --- do_post "matchByVideoId__missing" "/recallTest/matchByVideoId" \ "{\"videoId\":1,\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir" log "==> 完成,共生成 $(ls "$out_dir" | wc -l | tr -d ' ') 个文件" log "==> 路径: $out_dir" } # ===== diff 模式 ===== diff_results() { local a="$1" local b="$2" local dir_a="$RESULTS_DIR/$a" local dir_b="$RESULTS_DIR/$b" if [[ ! -d "$dir_a" ]]; then echo "目录不存在: $dir_a" >&2; exit 2; fi if [[ ! -d "$dir_b" ]]; then echo "目录不存在: $dir_b" >&2; exit 2; fi echo "==> 比对 $dir_a vs $dir_b" echo local files_a files_b files_a=$(cd "$dir_a" && ls *.json 2>/dev/null | sort) files_b=$(cd "$dir_b" && ls *.json 2>/dev/null | sort) if [[ "$files_a" != "$files_b" ]]; then echo "!! 文件清单不一致:" diff <(echo "$files_a") <(echo "$files_b") || true echo fi local total=0 changed=0 same=0 for f in $files_a; do if [[ ! -f "$dir_b/$f" ]]; then continue; fi total=$((total+1)) if diff -q "$dir_a/$f" "$dir_b/$f" >/dev/null 2>&1; then same=$((same+1)) else changed=$((changed+1)) echo "----- DIFF: $f -----" diff -u "$dir_a/$f" "$dir_b/$f" || true echo fi done echo "==> 统计: 共 $total 个文件, 一致 $same, 有差异 $changed" } # ===== 入口 ===== cmd="${1:-}" case "$cmd" in run) label="${2:-baseline}" run_tests "$label" ;; diff) a="${2:-baseline}" b="${3:-verify}" diff_results "$a" "$b" ;; *) cat <