Pārlūkot izejas kodu

test: 召回测试接口部署前后回归对比脚本

提供 baseline/verify 双跑 + diff 模式,用于校验 master 合并(matchTopNVideo
强类型化、DeconstructContent 重命名)对 HTTP 响应是否有非预期回归。

- 22 个用例覆盖 videoDetail/aiUnderstanding/deconstructPoints/matchByText/matchByVideoId
- JSON 排序+UTF8 不转义,保证 diff 不被字段顺序干扰
- 快照目录 .gitignore 不入库

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
刘立冬 1 dienu atpakaļ
vecāks
revīzija
b8bb794611
2 mainītis faili ar 256 papildinājumiem un 0 dzēšanām
  1. 3 0
      .gitignore
  2. 253 0
      script/recall_regression_test.sh

+ 3 - 0
.gitignore

@@ -37,3 +37,6 @@ build/
 *.log
 ### Runtime caches ###
 config-cache/
+
+### 回归测试生成的快照 ###
+script/recall_test_results/

+ 253 - 0
script/recall_regression_test.sh

@@ -0,0 +1,253 @@
+#!/usr/bin/env bash
+# recall_regression_test.sh
+#
+# 召回测试接口 — 部署前后回归对比脚本
+#
+# 用法:
+#   ./recall_regression_test.sh run baseline       # 部署前: 跑全部用例,存到 results/baseline/
+#   ./recall_regression_test.sh run verify         # 部署后: 跑同样用例,存到 results/verify/
+#   ./recall_regression_test.sh diff baseline verify   # 比对两次输出
+#
+# 可选环境变量:
+#   BASE_URL      默认 https://api-internal.piaoquantv.com/videoVector
+#   RESULTS_DIR   默认 ./recall_test_results
+#
+# 依赖: curl, python3 (用于 json 格式化)
+
+set -euo pipefail
+
+BASE_URL="${BASE_URL:-https://api-internal.piaoquantv.com/videoVector}"
+RESULTS_DIR="${RESULTS_DIR:-$(cd "$(dirname "$0")" && pwd)/recall_test_results}"
+
+# ===== 测试输入 =====
+# 真实视频 ID (用户确认)
+VIDEO_ID_PRIMARY=64632804
+# 候补 ID (从 prod matchByText 实际返回中取得)
+VIDEO_ID_SECONDARY=67688956
+
+# 文本召回查询词
+QUERY_TEXTS=(
+  "夏季减肥小妙招"
+  "宝宝辅食做法"
+  "婆婆和儿媳"
+)
+
+# 配置编码
+CONFIG_CODES=(VIDEO_TOPIC VIDEO_INSPIRATION)
+
+# Top-N (固定,避免参数不同导致的 size 差异)
+TOP_N=10
+
+# ===== 工具函数 =====
+log() { echo "[$(date +%H:%M:%S)] $*" >&2; }
+
+# 美化 + 标准化 JSON (排序 key、UTF-8 不转义、固定缩进) → 稳定 diff
+prettify() {
+  python3 -c '
+import sys, json
+try:
+    d = json.load(sys.stdin)
+    print(json.dumps(d, indent=2, ensure_ascii=False, sort_keys=True))
+except Exception as e:
+    sys.stderr.write("JSON parse failed: %s\n" % e)
+    sys.exit(1)
+'
+}
+
+# 调 GET
+do_get() {
+  local name="$1"; shift
+  local path="$1"; shift
+  local out_dir="$1"; shift
+  local raw="$out_dir/${name}.raw.txt"
+  local pretty="$out_dir/${name}.json"
+
+  log "GET  $name -> $path"
+  if curl -sS --max-time 30 "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
+    if prettify < "$raw" > "$pretty" 2>/dev/null; then
+      rm -f "$raw" "$raw.err"
+    else
+      log "  WARN: $name 返回非 JSON,保留 .raw.txt"
+      mv "$raw" "$pretty"
+      rm -f "$raw.err"
+    fi
+  else
+    log "  ERROR: $name curl 失败"
+    mv "$raw.err" "$pretty"
+    rm -f "$raw"
+  fi
+}
+
+# 调 POST
+do_post() {
+  local name="$1"; shift
+  local path="$1"; shift
+  local body="$1"; shift
+  local out_dir="$1"; shift
+  local raw="$out_dir/${name}.raw.txt"
+  local pretty="$out_dir/${name}.json"
+
+  log "POST $name -> $path  body=$body"
+  if curl -sS --max-time 60 -X POST \
+       -H "Content-Type: application/json" \
+       -d "$body" \
+       "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
+    if prettify < "$raw" > "$pretty" 2>/dev/null; then
+      rm -f "$raw" "$raw.err"
+    else
+      log "  WARN: $name 返回非 JSON,保留 .raw.txt"
+      mv "$raw" "$pretty"
+      rm -f "$raw.err"
+    fi
+  else
+    log "  ERROR: $name curl 失败"
+    mv "$raw.err" "$pretty"
+    rm -f "$raw"
+  fi
+}
+
+# ===== run 模式 =====
+run_tests() {
+  local label="$1"
+  local out_dir="$RESULTS_DIR/$label"
+  mkdir -p "$out_dir"
+
+  log "==> 输出目录: $out_dir"
+  log "==> BASE_URL: $BASE_URL"
+
+  # --- videoDetail ---
+  do_get "videoDetail__primary"   "/recallTest/videoDetail?videoId=$VIDEO_ID_PRIMARY"   "$out_dir"
+  do_get "videoDetail__secondary" "/recallTest/videoDetail?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
+  do_get "videoDetail__missing"   "/recallTest/videoDetail?videoId=1"                   "$out_dir"
+
+  # --- aiUnderstanding ---
+  do_get "aiUnderstanding__primary" "/recallTest/aiUnderstanding?videoId=$VIDEO_ID_PRIMARY" "$out_dir"
+
+  # --- deconstructPoints ---
+  do_get "deconstructPoints__primary"   "/recallTest/deconstructPoints?videoId=$VIDEO_ID_PRIMARY"   "$out_dir"
+  do_get "deconstructPoints__secondary" "/recallTest/deconstructPoints?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
+
+  # --- matchByText × (queryText × configCode) ---
+  local idx=0
+  for qt in "${QUERY_TEXTS[@]}"; do
+    idx=$((idx+1))
+    for cc in "${CONFIG_CODES[@]}"; do
+      do_post \
+        "matchByText__q${idx}__${cc}" \
+        "/recallTest/matchByText" \
+        "{\"queryText\":\"$qt\",\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
+        "$out_dir"
+    done
+    # 默认 configCode (不传)
+    do_post \
+      "matchByText__q${idx}__DEFAULT" \
+      "/recallTest/matchByText" \
+      "{\"queryText\":\"$qt\",\"topN\":$TOP_N}" \
+      "$out_dir"
+  done
+
+  # --- matchByText 边界: 空文本 ---
+  do_post "matchByText__empty" "/recallTest/matchByText" \
+    "{\"queryText\":\"\",\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"
+
+  # --- matchByVideoId × configCode ---
+  for cc in "${CONFIG_CODES[@]}"; do
+    do_post \
+      "matchByVideoId__primary__${cc}" \
+      "/recallTest/matchByVideoId" \
+      "{\"videoId\":$VIDEO_ID_PRIMARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
+      "$out_dir"
+    do_post \
+      "matchByVideoId__secondary__${cc}" \
+      "/recallTest/matchByVideoId" \
+      "{\"videoId\":$VIDEO_ID_SECONDARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
+      "$out_dir"
+  done
+  # 默认 configCode
+  do_post \
+    "matchByVideoId__primary__DEFAULT" \
+    "/recallTest/matchByVideoId" \
+    "{\"videoId\":$VIDEO_ID_PRIMARY,\"topN\":$TOP_N}" \
+    "$out_dir"
+
+  # --- matchByVideoId 边界: 不存在的 ID ---
+  do_post "matchByVideoId__missing" "/recallTest/matchByVideoId" \
+    "{\"videoId\":1,\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"
+
+  log "==> 完成,共生成 $(ls "$out_dir" | wc -l | tr -d ' ') 个文件"
+  log "==> 路径: $out_dir"
+}
+
+# ===== diff 模式 =====
+diff_results() {
+  local a="$1"
+  local b="$2"
+  local dir_a="$RESULTS_DIR/$a"
+  local dir_b="$RESULTS_DIR/$b"
+
+  if [[ ! -d "$dir_a" ]]; then echo "目录不存在: $dir_a" >&2; exit 2; fi
+  if [[ ! -d "$dir_b" ]]; then echo "目录不存在: $dir_b" >&2; exit 2; fi
+
+  echo "==> 比对 $dir_a   vs   $dir_b"
+  echo
+
+  local files_a files_b
+  files_a=$(cd "$dir_a" && ls *.json 2>/dev/null | sort)
+  files_b=$(cd "$dir_b" && ls *.json 2>/dev/null | sort)
+
+  if [[ "$files_a" != "$files_b" ]]; then
+    echo "!! 文件清单不一致:"
+    diff <(echo "$files_a") <(echo "$files_b") || true
+    echo
+  fi
+
+  local total=0 changed=0 same=0
+  for f in $files_a; do
+    if [[ ! -f "$dir_b/$f" ]]; then continue; fi
+    total=$((total+1))
+    if diff -q "$dir_a/$f" "$dir_b/$f" >/dev/null 2>&1; then
+      same=$((same+1))
+    else
+      changed=$((changed+1))
+      echo "----- DIFF: $f -----"
+      diff -u "$dir_a/$f" "$dir_b/$f" || true
+      echo
+    fi
+  done
+
+  echo "==> 统计: 共 $total 个文件, 一致 $same, 有差异 $changed"
+}
+
+# ===== 入口 =====
+cmd="${1:-}"
+case "$cmd" in
+  run)
+    label="${2:-baseline}"
+    run_tests "$label"
+    ;;
+  diff)
+    a="${2:-baseline}"
+    b="${3:-verify}"
+    diff_results "$a" "$b"
+    ;;
+  *)
+    cat <<EOF
+用法:
+  $0 run [LABEL]              # 跑全部测试,默认 LABEL=baseline
+  $0 diff LABEL_A LABEL_B     # 比对两次输出
+
+示例:
+  # 部署前
+  $0 run baseline
+  # (你部署新版本)
+  $0 run verify
+  # 比对差异
+  $0 diff baseline verify
+
+环境变量:
+  BASE_URL=$BASE_URL
+  RESULTS_DIR=$RESULTS_DIR
+EOF
+    exit 1
+    ;;
+esac