1 dienu atpakaļ · b8bb794611
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,6 @@ build/
 
				 *.log
			
 
				 ### Runtime caches ###
			
 
				 config-cache/
			
 
				+
			
 
				+### 回归测试生成的快照 ###
			
 
				+script/recall_test_results/
			
--- a/script/recall_regression_test.sh
+++ b/script/recall_regression_test.sh
@@ -0,0 +1,253 @@
 
				+#!/usr/bin/env bash
			
 
				+# recall_regression_test.sh
			
 
				+#
			
 
				+# 召回测试接口 — 部署前后回归对比脚本
			
 
				+#
			
 
				+# 用法:
			
 
				+#   ./recall_regression_test.sh run baseline       # 部署前: 跑全部用例,存到 results/baseline/
			
 
				+#   ./recall_regression_test.sh run verify         # 部署后: 跑同样用例,存到 results/verify/
			
 
				+#   ./recall_regression_test.sh diff baseline verify   # 比对两次输出
			
 
				+#
			
 
				+# 可选环境变量:
			
 
				+#   BASE_URL      默认 https://api-internal.piaoquantv.com/videoVector
			
 
				+#   RESULTS_DIR   默认 ./recall_test_results
			
 
				+#
			
 
				+# 依赖: curl, python3 (用于 json 格式化)
			
 
				+
			
 
				+set -euo pipefail
			
 
				+
			
 
				+BASE_URL="${BASE_URL:-https://api-internal.piaoquantv.com/videoVector}"
			
 
				+RESULTS_DIR="${RESULTS_DIR:-$(cd "$(dirname "$0")" && pwd)/recall_test_results}"
			
 
				+
			
 
				+# ===== 测试输入 =====
			
 
				+# 真实视频 ID (用户确认)
			
 
				+VIDEO_ID_PRIMARY=64632804
			
 
				+# 候补 ID (从 prod matchByText 实际返回中取得)
			
 
				+VIDEO_ID_SECONDARY=67688956
			
 
				+
			
 
				+# 文本召回查询词
			
 
				+QUERY_TEXTS=(
			
 
				+  "夏季减肥小妙招"
			
 
				+  "宝宝辅食做法"
			
 
				+  "婆婆和儿媳"
			
 
				+)
			
 
				+
			
 
				+# 配置编码
			
 
				+CONFIG_CODES=(VIDEO_TOPIC VIDEO_INSPIRATION)
			
 
				+
			
 
				+# Top-N (固定,避免参数不同导致的 size 差异)
			
 
				+TOP_N=10
			
 
				+
			
 
				+# ===== 工具函数 =====
			
 
				+log() { echo "[$(date +%H:%M:%S)] $*" >&2; }
			
 
				+
			
 
				+# 美化 + 标准化 JSON (排序 key、UTF-8 不转义、固定缩进) → 稳定 diff
			
 
				+prettify() {
			
 
				+  python3 -c '
			
 
				+import sys, json
			
 
				+try:
			
 
				+    d = json.load(sys.stdin)
			
 
				+    print(json.dumps(d, indent=2, ensure_ascii=False, sort_keys=True))
			
 
				+except Exception as e:
			
 
				+    sys.stderr.write("JSON parse failed: %s\n" % e)
			
 
				+    sys.exit(1)
			
 
				+'
			
 
				+}
			
 
				+
			
 
				+# 调 GET
			
 
				+do_get() {
			
 
				+  local name="$1"; shift
			
 
				+  local path="$1"; shift
			
 
				+  local out_dir="$1"; shift
			
 
				+  local raw="$out_dir/${name}.raw.txt"
			
 
				+  local pretty="$out_dir/${name}.json"
			
 
				+
			
 
				+  log "GET  $name -> $path"
			
 
				+  if curl -sS --max-time 30 "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
			
 
				+    if prettify < "$raw" > "$pretty" 2>/dev/null; then
			
 
				+      rm -f "$raw" "$raw.err"
			
 
				+    else
			
 
				+      log "  WARN: $name 返回非 JSON,保留 .raw.txt"
			
 
				+      mv "$raw" "$pretty"
			
 
				+      rm -f "$raw.err"
			
 
				+    fi
			
 
				+  else
			
 
				+    log "  ERROR: $name curl 失败"
			
 
				+    mv "$raw.err" "$pretty"
			
 
				+    rm -f "$raw"
			
 
				+  fi
			
 
				+}
			
 
				+
			
 
				+# 调 POST
			
 
				+do_post() {
			
 
				+  local name="$1"; shift
			
 
				+  local path="$1"; shift
			
 
				+  local body="$1"; shift
			
 
				+  local out_dir="$1"; shift
			
 
				+  local raw="$out_dir/${name}.raw.txt"
			
 
				+  local pretty="$out_dir/${name}.json"
			
 
				+
			
 
				+  log "POST $name -> $path  body=$body"
			
 
				+  if curl -sS --max-time 60 -X POST \
			
 
				+       -H "Content-Type: application/json" \
			
 
				+       -d "$body" \
			
 
				+       "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
			
 
				+    if prettify < "$raw" > "$pretty" 2>/dev/null; then
			
 
				+      rm -f "$raw" "$raw.err"
			
 
				+    else
			
 
				+      log "  WARN: $name 返回非 JSON,保留 .raw.txt"
			
 
				+      mv "$raw" "$pretty"
			
 
				+      rm -f "$raw.err"
			
 
				+    fi
			
 
				+  else
			
 
				+    log "  ERROR: $name curl 失败"
			
 
				+    mv "$raw.err" "$pretty"
			
 
				+    rm -f "$raw"
			
 
				+  fi
			
 
				+}
			
 
				+
			
 
				+# ===== run 模式 =====
			
 
				+run_tests() {
			
 
				+  local label="$1"
			
 
				+  local out_dir="$RESULTS_DIR/$label"
			
 
				+  mkdir -p "$out_dir"
			
 
				+
			
 
				+  log "==> 输出目录: $out_dir"
			
 
				+  log "==> BASE_URL: $BASE_URL"
			
 
				+
			
 
				+  # --- videoDetail ---
			
 
				+  do_get "videoDetail__primary"   "/recallTest/videoDetail?videoId=$VIDEO_ID_PRIMARY"   "$out_dir"
			
 
				+  do_get "videoDetail__secondary" "/recallTest/videoDetail?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
			
 
				+  do_get "videoDetail__missing"   "/recallTest/videoDetail?videoId=1"                   "$out_dir"
			
 
				+
			
 
				+  # --- aiUnderstanding ---
			
 
				+  do_get "aiUnderstanding__primary" "/recallTest/aiUnderstanding?videoId=$VIDEO_ID_PRIMARY" "$out_dir"
			
 
				+
			
 
				+  # --- deconstructPoints ---
			
 
				+  do_get "deconstructPoints__primary"   "/recallTest/deconstructPoints?videoId=$VIDEO_ID_PRIMARY"   "$out_dir"
			
 
				+  do_get "deconstructPoints__secondary" "/recallTest/deconstructPoints?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
			
 
				+
			
 
				+  # --- matchByText × (queryText × configCode) ---
			
 
				+  local idx=0
			
 
				+  for qt in "${QUERY_TEXTS[@]}"; do
			
 
				+    idx=$((idx+1))
			
 
				+    for cc in "${CONFIG_CODES[@]}"; do
			
 
				+      do_post \
			
 
				+        "matchByText__q${idx}__${cc}" \
			
 
				+        "/recallTest/matchByText" \
			
 
				+        "{\"queryText\":\"$qt\",\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
			
 
				+        "$out_dir"
			
 
				+    done
			
 
				+    # 默认 configCode (不传)
			
 
				+    do_post \
			
 
				+      "matchByText__q${idx}__DEFAULT" \
			
 
				+      "/recallTest/matchByText" \
			
 
				+      "{\"queryText\":\"$qt\",\"topN\":$TOP_N}" \
			
 
				+      "$out_dir"
			
 
				+  done
			
 
				+
			
 
				+  # --- matchByText 边界: 空文本 ---
			
 
				+  do_post "matchByText__empty" "/recallTest/matchByText" \
			
 
				+    "{\"queryText\":\"\",\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"
			
 
				+
			
 
				+  # --- matchByVideoId × configCode ---
			
 
				+  for cc in "${CONFIG_CODES[@]}"; do
			
 
				+    do_post \
			
 
				+      "matchByVideoId__primary__${cc}" \
			
 
				+      "/recallTest/matchByVideoId" \
			
 
				+      "{\"videoId\":$VIDEO_ID_PRIMARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
			
 
				+      "$out_dir"
			
 
				+    do_post \
			
 
				+      "matchByVideoId__secondary__${cc}" \
			
 
				+      "/recallTest/matchByVideoId" \
			
 
				+      "{\"videoId\":$VIDEO_ID_SECONDARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
			
 
				+      "$out_dir"
			
 
				+  done
			
 
				+  # 默认 configCode
			
 
				+  do_post \
			
 
				+    "matchByVideoId__primary__DEFAULT" \
			
 
				+    "/recallTest/matchByVideoId" \
			
 
				+    "{\"videoId\":$VIDEO_ID_PRIMARY,\"topN\":$TOP_N}" \
			
 
				+    "$out_dir"
			
 
				+
			
 
				+  # --- matchByVideoId 边界: 不存在的 ID ---
			
 
				+  do_post "matchByVideoId__missing" "/recallTest/matchByVideoId" \
			
 
				+    "{\"videoId\":1,\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"
			
 
				+
			
 
				+  log "==> 完成,共生成 $(ls "$out_dir" | wc -l | tr -d ' ') 个文件"
			
 
				+  log "==> 路径: $out_dir"
			
 
				+}
			
 
				+
			
 
				+# ===== diff 模式 =====
			
 
				+diff_results() {
			
 
				+  local a="$1"
			
 
				+  local b="$2"
			
 
				+  local dir_a="$RESULTS_DIR/$a"
			
 
				+  local dir_b="$RESULTS_DIR/$b"
			
 
				+
			
 
				+  if [[ ! -d "$dir_a" ]]; then echo "目录不存在: $dir_a" >&2; exit 2; fi
			
 
				+  if [[ ! -d "$dir_b" ]]; then echo "目录不存在: $dir_b" >&2; exit 2; fi
			
 
				+
			
 
				+  echo "==> 比对 $dir_a   vs   $dir_b"
			
 
				+  echo
			
 
				+
			
 
				+  local files_a files_b
			
 
				+  files_a=$(cd "$dir_a" && ls *.json 2>/dev/null | sort)
			
 
				+  files_b=$(cd "$dir_b" && ls *.json 2>/dev/null | sort)
			
 
				+
			
 
				+  if [[ "$files_a" != "$files_b" ]]; then
			
 
				+    echo "!! 文件清单不一致:"
			
 
				+    diff <(echo "$files_a") <(echo "$files_b") || true
			
 
				+    echo
			
 
				+  fi
			
 
				+
			
 
				+  local total=0 changed=0 same=0
			
 
				+  for f in $files_a; do
			
 
				+    if [[ ! -f "$dir_b/$f" ]]; then continue; fi
			
 
				+    total=$((total+1))
			
 
				+    if diff -q "$dir_a/$f" "$dir_b/$f" >/dev/null 2>&1; then
			
 
				+      same=$((same+1))
			
 
				+    else
			
 
				+      changed=$((changed+1))
			
 
				+      echo "----- DIFF: $f -----"
			
 
				+      diff -u "$dir_a/$f" "$dir_b/$f" || true
			
 
				+      echo
			
 
				+    fi
			
 
				+  done
			
 
				+
			
 
				+  echo "==> 统计: 共 $total 个文件, 一致 $same, 有差异 $changed"
			
 
				+}
			
 
				+
			
 
				+# ===== 入口 =====
			
 
				+cmd="${1:-}"
			
 
				+case "$cmd" in
			
 
				+  run)
			
 
				+    label="${2:-baseline}"
			
 
				+    run_tests "$label"
			
 
				+    ;;
			
 
				+  diff)
			
 
				+    a="${2:-baseline}"
			
 
				+    b="${3:-verify}"
			
 
				+    diff_results "$a" "$b"
			
 
				+    ;;
			
 
				+  *)
			
 
				+    cat <<EOF
			
 
				+用法:
			
 
				+  $0 run [LABEL]              # 跑全部测试,默认 LABEL=baseline
			
 
				+  $0 diff LABEL_A LABEL_B     # 比对两次输出
			
 
				+
			
 
				+示例:
			
 
				+  # 部署前
			
 
				+  $0 run baseline
			
 
				+  # (你部署新版本)
			
 
				+  $0 run verify
			
 
				+  # 比对差异
			
 
				+  $0 diff baseline verify
			
 
				+
			
 
				+环境变量:
			
 
				+  BASE_URL=$BASE_URL
			
 
				+  RESULTS_DIR=$RESULTS_DIR
			
 
				+EOF
			
 
				+    exit 1
			
 
				+    ;;
			
 
				+esac