| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253 |
- #!/usr/bin/env bash
- # recall_regression_test.sh
- #
- # 召回测试接口 — 部署前后回归对比脚本
- #
- # 用法:
- # ./recall_regression_test.sh run baseline # 部署前: 跑全部用例,存到 results/baseline/
- # ./recall_regression_test.sh run verify # 部署后: 跑同样用例,存到 results/verify/
- # ./recall_regression_test.sh diff baseline verify # 比对两次输出
- #
- # 可选环境变量:
- # BASE_URL 默认 https://api-internal.piaoquantv.com/videoVector
- # RESULTS_DIR 默认 ./recall_test_results
- #
- # 依赖: curl, python3 (用于 json 格式化)
- set -euo pipefail
- BASE_URL="${BASE_URL:-https://api-internal.piaoquantv.com/videoVector}"
- RESULTS_DIR="${RESULTS_DIR:-$(cd "$(dirname "$0")" && pwd)/recall_test_results}"
- # ===== 测试输入 =====
- # 真实视频 ID (用户确认)
- VIDEO_ID_PRIMARY=64632804
- # 候补 ID (从 prod matchByText 实际返回中取得)
- VIDEO_ID_SECONDARY=67688956
- # 文本召回查询词
- QUERY_TEXTS=(
- "夏季减肥小妙招"
- "宝宝辅食做法"
- "婆婆和儿媳"
- )
- # 配置编码
- CONFIG_CODES=(VIDEO_TOPIC VIDEO_INSPIRATION)
- # Top-N (固定,避免参数不同导致的 size 差异)
- TOP_N=10
- # ===== 工具函数 =====
- log() { echo "[$(date +%H:%M:%S)] $*" >&2; }
- # 美化 + 标准化 JSON (排序 key、UTF-8 不转义、固定缩进) → 稳定 diff
- prettify() {
- python3 -c '
- import sys, json
- try:
- d = json.load(sys.stdin)
- print(json.dumps(d, indent=2, ensure_ascii=False, sort_keys=True))
- except Exception as e:
- sys.stderr.write("JSON parse failed: %s\n" % e)
- sys.exit(1)
- '
- }
- # 调 GET
- do_get() {
- local name="$1"; shift
- local path="$1"; shift
- local out_dir="$1"; shift
- local raw="$out_dir/${name}.raw.txt"
- local pretty="$out_dir/${name}.json"
- log "GET $name -> $path"
- if curl -sS --max-time 30 "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
- if prettify < "$raw" > "$pretty" 2>/dev/null; then
- rm -f "$raw" "$raw.err"
- else
- log " WARN: $name 返回非 JSON,保留 .raw.txt"
- mv "$raw" "$pretty"
- rm -f "$raw.err"
- fi
- else
- log " ERROR: $name curl 失败"
- mv "$raw.err" "$pretty"
- rm -f "$raw"
- fi
- }
- # 调 POST
- do_post() {
- local name="$1"; shift
- local path="$1"; shift
- local body="$1"; shift
- local out_dir="$1"; shift
- local raw="$out_dir/${name}.raw.txt"
- local pretty="$out_dir/${name}.json"
- log "POST $name -> $path body=$body"
- if curl -sS --max-time 60 -X POST \
- -H "Content-Type: application/json" \
- -d "$body" \
- "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
- if prettify < "$raw" > "$pretty" 2>/dev/null; then
- rm -f "$raw" "$raw.err"
- else
- log " WARN: $name 返回非 JSON,保留 .raw.txt"
- mv "$raw" "$pretty"
- rm -f "$raw.err"
- fi
- else
- log " ERROR: $name curl 失败"
- mv "$raw.err" "$pretty"
- rm -f "$raw"
- fi
- }
- # ===== run 模式 =====
- run_tests() {
- local label="$1"
- local out_dir="$RESULTS_DIR/$label"
- mkdir -p "$out_dir"
- log "==> 输出目录: $out_dir"
- log "==> BASE_URL: $BASE_URL"
- # --- videoDetail ---
- do_get "videoDetail__primary" "/recallTest/videoDetail?videoId=$VIDEO_ID_PRIMARY" "$out_dir"
- do_get "videoDetail__secondary" "/recallTest/videoDetail?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
- do_get "videoDetail__missing" "/recallTest/videoDetail?videoId=1" "$out_dir"
- # --- aiUnderstanding ---
- do_get "aiUnderstanding__primary" "/recallTest/aiUnderstanding?videoId=$VIDEO_ID_PRIMARY" "$out_dir"
- # --- deconstructPoints ---
- do_get "deconstructPoints__primary" "/recallTest/deconstructPoints?videoId=$VIDEO_ID_PRIMARY" "$out_dir"
- do_get "deconstructPoints__secondary" "/recallTest/deconstructPoints?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
- # --- matchByText × (queryText × configCode) ---
- local idx=0
- for qt in "${QUERY_TEXTS[@]}"; do
- idx=$((idx+1))
- for cc in "${CONFIG_CODES[@]}"; do
- do_post \
- "matchByText__q${idx}__${cc}" \
- "/recallTest/matchByText" \
- "{\"queryText\":\"$qt\",\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
- "$out_dir"
- done
- # 默认 configCode (不传)
- do_post \
- "matchByText__q${idx}__DEFAULT" \
- "/recallTest/matchByText" \
- "{\"queryText\":\"$qt\",\"topN\":$TOP_N}" \
- "$out_dir"
- done
- # --- matchByText 边界: 空文本 ---
- do_post "matchByText__empty" "/recallTest/matchByText" \
- "{\"queryText\":\"\",\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"
- # --- matchByVideoId × configCode ---
- for cc in "${CONFIG_CODES[@]}"; do
- do_post \
- "matchByVideoId__primary__${cc}" \
- "/recallTest/matchByVideoId" \
- "{\"videoId\":$VIDEO_ID_PRIMARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
- "$out_dir"
- do_post \
- "matchByVideoId__secondary__${cc}" \
- "/recallTest/matchByVideoId" \
- "{\"videoId\":$VIDEO_ID_SECONDARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
- "$out_dir"
- done
- # 默认 configCode
- do_post \
- "matchByVideoId__primary__DEFAULT" \
- "/recallTest/matchByVideoId" \
- "{\"videoId\":$VIDEO_ID_PRIMARY,\"topN\":$TOP_N}" \
- "$out_dir"
- # --- matchByVideoId 边界: 不存在的 ID ---
- do_post "matchByVideoId__missing" "/recallTest/matchByVideoId" \
- "{\"videoId\":1,\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"
- log "==> 完成,共生成 $(ls "$out_dir" | wc -l | tr -d ' ') 个文件"
- log "==> 路径: $out_dir"
- }
- # ===== diff 模式 =====
- diff_results() {
- local a="$1"
- local b="$2"
- local dir_a="$RESULTS_DIR/$a"
- local dir_b="$RESULTS_DIR/$b"
- if [[ ! -d "$dir_a" ]]; then echo "目录不存在: $dir_a" >&2; exit 2; fi
- if [[ ! -d "$dir_b" ]]; then echo "目录不存在: $dir_b" >&2; exit 2; fi
- echo "==> 比对 $dir_a vs $dir_b"
- echo
- local files_a files_b
- files_a=$(cd "$dir_a" && ls *.json 2>/dev/null | sort)
- files_b=$(cd "$dir_b" && ls *.json 2>/dev/null | sort)
- if [[ "$files_a" != "$files_b" ]]; then
- echo "!! 文件清单不一致:"
- diff <(echo "$files_a") <(echo "$files_b") || true
- echo
- fi
- local total=0 changed=0 same=0
- for f in $files_a; do
- if [[ ! -f "$dir_b/$f" ]]; then continue; fi
- total=$((total+1))
- if diff -q "$dir_a/$f" "$dir_b/$f" >/dev/null 2>&1; then
- same=$((same+1))
- else
- changed=$((changed+1))
- echo "----- DIFF: $f -----"
- diff -u "$dir_a/$f" "$dir_b/$f" || true
- echo
- fi
- done
- echo "==> 统计: 共 $total 个文件, 一致 $same, 有差异 $changed"
- }
- # ===== 入口 =====
- cmd="${1:-}"
- case "$cmd" in
- run)
- label="${2:-baseline}"
- run_tests "$label"
- ;;
- diff)
- a="${2:-baseline}"
- b="${3:-verify}"
- diff_results "$a" "$b"
- ;;
- *)
- cat <<EOF
- 用法:
- $0 run [LABEL] # 跑全部测试,默认 LABEL=baseline
- $0 diff LABEL_A LABEL_B # 比对两次输出
- 示例:
- # 部署前
- $0 run baseline
- # (你部署新版本)
- $0 run verify
- # 比对差异
- $0 diff baseline verify
- 环境变量:
- BASE_URL=$BASE_URL
- RESULTS_DIR=$RESULTS_DIR
- EOF
- exit 1
- ;;
- esac
|