|
|
@@ -0,0 +1,253 @@
|
|
|
+#!/usr/bin/env bash
|
|
|
+# recall_regression_test.sh
|
|
|
+#
|
|
|
+# 召回测试接口 — 部署前后回归对比脚本
|
|
|
+#
|
|
|
+# 用法:
|
|
|
+# ./recall_regression_test.sh run baseline # 部署前: 跑全部用例,存到 results/baseline/
|
|
|
+# ./recall_regression_test.sh run verify # 部署后: 跑同样用例,存到 results/verify/
|
|
|
+# ./recall_regression_test.sh diff baseline verify # 比对两次输出
|
|
|
+#
|
|
|
+# 可选环境变量:
|
|
|
+# BASE_URL 默认 https://api-internal.piaoquantv.com/videoVector
|
|
|
+# RESULTS_DIR 默认 ./recall_test_results
|
|
|
+#
|
|
|
+# 依赖: curl, python3 (用于 json 格式化)
|
|
|
+
|
|
|
+set -euo pipefail
|
|
|
+
|
|
|
+BASE_URL="${BASE_URL:-https://api-internal.piaoquantv.com/videoVector}"
|
|
|
+RESULTS_DIR="${RESULTS_DIR:-$(cd "$(dirname "$0")" && pwd)/recall_test_results}"
|
|
|
+
|
|
|
+# ===== 测试输入 =====
|
|
|
+# 真实视频 ID (用户确认)
|
|
|
+VIDEO_ID_PRIMARY=64632804
|
|
|
+# 候补 ID (从 prod matchByText 实际返回中取得)
|
|
|
+VIDEO_ID_SECONDARY=67688956
|
|
|
+
|
|
|
+# 文本召回查询词
|
|
|
+QUERY_TEXTS=(
|
|
|
+ "夏季减肥小妙招"
|
|
|
+ "宝宝辅食做法"
|
|
|
+ "婆婆和儿媳"
|
|
|
+)
|
|
|
+
|
|
|
+# 配置编码
|
|
|
+CONFIG_CODES=(VIDEO_TOPIC VIDEO_INSPIRATION)
|
|
|
+
|
|
|
+# Top-N (固定,避免参数不同导致的 size 差异)
|
|
|
+TOP_N=10
|
|
|
+
|
|
|
+# ===== 工具函数 =====
|
|
|
+log() { echo "[$(date +%H:%M:%S)] $*" >&2; }
|
|
|
+
|
|
|
+# 美化 + 标准化 JSON (排序 key、UTF-8 不转义、固定缩进) → 稳定 diff
|
|
|
+prettify() {
|
|
|
+ python3 -c '
|
|
|
+import sys, json
|
|
|
+try:
|
|
|
+ d = json.load(sys.stdin)
|
|
|
+ print(json.dumps(d, indent=2, ensure_ascii=False, sort_keys=True))
|
|
|
+except Exception as e:
|
|
|
+ sys.stderr.write("JSON parse failed: %s\n" % e)
|
|
|
+ sys.exit(1)
|
|
|
+'
|
|
|
+}
|
|
|
+
|
|
|
+# 调 GET
|
|
|
+do_get() {
|
|
|
+ local name="$1"; shift
|
|
|
+ local path="$1"; shift
|
|
|
+ local out_dir="$1"; shift
|
|
|
+ local raw="$out_dir/${name}.raw.txt"
|
|
|
+ local pretty="$out_dir/${name}.json"
|
|
|
+
|
|
|
+ log "GET $name -> $path"
|
|
|
+ if curl -sS --max-time 30 "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
|
|
|
+ if prettify < "$raw" > "$pretty" 2>/dev/null; then
|
|
|
+ rm -f "$raw" "$raw.err"
|
|
|
+ else
|
|
|
+ log " WARN: $name 返回非 JSON,保留 .raw.txt"
|
|
|
+ mv "$raw" "$pretty"
|
|
|
+ rm -f "$raw.err"
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ log " ERROR: $name curl 失败"
|
|
|
+ mv "$raw.err" "$pretty"
|
|
|
+ rm -f "$raw"
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# 调 POST
|
|
|
+do_post() {
|
|
|
+ local name="$1"; shift
|
|
|
+ local path="$1"; shift
|
|
|
+ local body="$1"; shift
|
|
|
+ local out_dir="$1"; shift
|
|
|
+ local raw="$out_dir/${name}.raw.txt"
|
|
|
+ local pretty="$out_dir/${name}.json"
|
|
|
+
|
|
|
+ log "POST $name -> $path body=$body"
|
|
|
+ if curl -sS --max-time 60 -X POST \
|
|
|
+ -H "Content-Type: application/json" \
|
|
|
+ -d "$body" \
|
|
|
+ "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
|
|
|
+ if prettify < "$raw" > "$pretty" 2>/dev/null; then
|
|
|
+ rm -f "$raw" "$raw.err"
|
|
|
+ else
|
|
|
+ log " WARN: $name 返回非 JSON,保留 .raw.txt"
|
|
|
+ mv "$raw" "$pretty"
|
|
|
+ rm -f "$raw.err"
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ log " ERROR: $name curl 失败"
|
|
|
+ mv "$raw.err" "$pretty"
|
|
|
+ rm -f "$raw"
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# ===== run 模式 =====
|
|
|
+run_tests() {
|
|
|
+ local label="$1"
|
|
|
+ local out_dir="$RESULTS_DIR/$label"
|
|
|
+ mkdir -p "$out_dir"
|
|
|
+
|
|
|
+ log "==> 输出目录: $out_dir"
|
|
|
+ log "==> BASE_URL: $BASE_URL"
|
|
|
+
|
|
|
+ # --- videoDetail ---
|
|
|
+ do_get "videoDetail__primary" "/recallTest/videoDetail?videoId=$VIDEO_ID_PRIMARY" "$out_dir"
|
|
|
+ do_get "videoDetail__secondary" "/recallTest/videoDetail?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
|
|
|
+ do_get "videoDetail__missing" "/recallTest/videoDetail?videoId=1" "$out_dir"
|
|
|
+
|
|
|
+ # --- aiUnderstanding ---
|
|
|
+ do_get "aiUnderstanding__primary" "/recallTest/aiUnderstanding?videoId=$VIDEO_ID_PRIMARY" "$out_dir"
|
|
|
+
|
|
|
+ # --- deconstructPoints ---
|
|
|
+ do_get "deconstructPoints__primary" "/recallTest/deconstructPoints?videoId=$VIDEO_ID_PRIMARY" "$out_dir"
|
|
|
+ do_get "deconstructPoints__secondary" "/recallTest/deconstructPoints?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
|
|
|
+
|
|
|
+ # --- matchByText × (queryText × configCode) ---
|
|
|
+ local idx=0
|
|
|
+ for qt in "${QUERY_TEXTS[@]}"; do
|
|
|
+ idx=$((idx+1))
|
|
|
+ for cc in "${CONFIG_CODES[@]}"; do
|
|
|
+ do_post \
|
|
|
+ "matchByText__q${idx}__${cc}" \
|
|
|
+ "/recallTest/matchByText" \
|
|
|
+ "{\"queryText\":\"$qt\",\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
|
|
|
+ "$out_dir"
|
|
|
+ done
|
|
|
+ # 默认 configCode (不传)
|
|
|
+ do_post \
|
|
|
+ "matchByText__q${idx}__DEFAULT" \
|
|
|
+ "/recallTest/matchByText" \
|
|
|
+ "{\"queryText\":\"$qt\",\"topN\":$TOP_N}" \
|
|
|
+ "$out_dir"
|
|
|
+ done
|
|
|
+
|
|
|
+ # --- matchByText 边界: 空文本 ---
|
|
|
+ do_post "matchByText__empty" "/recallTest/matchByText" \
|
|
|
+ "{\"queryText\":\"\",\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"
|
|
|
+
|
|
|
+ # --- matchByVideoId × configCode ---
|
|
|
+ for cc in "${CONFIG_CODES[@]}"; do
|
|
|
+ do_post \
|
|
|
+ "matchByVideoId__primary__${cc}" \
|
|
|
+ "/recallTest/matchByVideoId" \
|
|
|
+ "{\"videoId\":$VIDEO_ID_PRIMARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
|
|
|
+ "$out_dir"
|
|
|
+ do_post \
|
|
|
+ "matchByVideoId__secondary__${cc}" \
|
|
|
+ "/recallTest/matchByVideoId" \
|
|
|
+ "{\"videoId\":$VIDEO_ID_SECONDARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
|
|
|
+ "$out_dir"
|
|
|
+ done
|
|
|
+ # 默认 configCode
|
|
|
+ do_post \
|
|
|
+ "matchByVideoId__primary__DEFAULT" \
|
|
|
+ "/recallTest/matchByVideoId" \
|
|
|
+ "{\"videoId\":$VIDEO_ID_PRIMARY,\"topN\":$TOP_N}" \
|
|
|
+ "$out_dir"
|
|
|
+
|
|
|
+ # --- matchByVideoId 边界: 不存在的 ID ---
|
|
|
+ do_post "matchByVideoId__missing" "/recallTest/matchByVideoId" \
|
|
|
+ "{\"videoId\":1,\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"
|
|
|
+
|
|
|
+ log "==> 完成,共生成 $(ls "$out_dir" | wc -l | tr -d ' ') 个文件"
|
|
|
+ log "==> 路径: $out_dir"
|
|
|
+}
|
|
|
+
|
|
|
+# ===== diff 模式 =====
|
|
|
+diff_results() {
|
|
|
+ local a="$1"
|
|
|
+ local b="$2"
|
|
|
+ local dir_a="$RESULTS_DIR/$a"
|
|
|
+ local dir_b="$RESULTS_DIR/$b"
|
|
|
+
|
|
|
+ if [[ ! -d "$dir_a" ]]; then echo "目录不存在: $dir_a" >&2; exit 2; fi
|
|
|
+ if [[ ! -d "$dir_b" ]]; then echo "目录不存在: $dir_b" >&2; exit 2; fi
|
|
|
+
|
|
|
+ echo "==> 比对 $dir_a vs $dir_b"
|
|
|
+ echo
|
|
|
+
|
|
|
+ local files_a files_b
|
|
|
+ files_a=$(cd "$dir_a" && ls *.json 2>/dev/null | sort)
|
|
|
+ files_b=$(cd "$dir_b" && ls *.json 2>/dev/null | sort)
|
|
|
+
|
|
|
+ if [[ "$files_a" != "$files_b" ]]; then
|
|
|
+ echo "!! 文件清单不一致:"
|
|
|
+ diff <(echo "$files_a") <(echo "$files_b") || true
|
|
|
+ echo
|
|
|
+ fi
|
|
|
+
|
|
|
+ local total=0 changed=0 same=0
|
|
|
+ for f in $files_a; do
|
|
|
+ if [[ ! -f "$dir_b/$f" ]]; then continue; fi
|
|
|
+ total=$((total+1))
|
|
|
+ if diff -q "$dir_a/$f" "$dir_b/$f" >/dev/null 2>&1; then
|
|
|
+ same=$((same+1))
|
|
|
+ else
|
|
|
+ changed=$((changed+1))
|
|
|
+ echo "----- DIFF: $f -----"
|
|
|
+ diff -u "$dir_a/$f" "$dir_b/$f" || true
|
|
|
+ echo
|
|
|
+ fi
|
|
|
+ done
|
|
|
+
|
|
|
+ echo "==> 统计: 共 $total 个文件, 一致 $same, 有差异 $changed"
|
|
|
+}
|
|
|
+
|
|
|
+# ===== 入口 =====
|
|
|
+cmd="${1:-}"
|
|
|
+case "$cmd" in
|
|
|
+ run)
|
|
|
+ label="${2:-baseline}"
|
|
|
+ run_tests "$label"
|
|
|
+ ;;
|
|
|
+ diff)
|
|
|
+ a="${2:-baseline}"
|
|
|
+ b="${3:-verify}"
|
|
|
+ diff_results "$a" "$b"
|
|
|
+ ;;
|
|
|
+ *)
|
|
|
+ cat <<EOF
|
|
|
+用法:
|
|
|
+ $0 run [LABEL] # 跑全部测试,默认 LABEL=baseline
|
|
|
+ $0 diff LABEL_A LABEL_B # 比对两次输出
|
|
|
+
|
|
|
+示例:
|
|
|
+ # 部署前
|
|
|
+ $0 run baseline
|
|
|
+ # (你部署新版本)
|
|
|
+ $0 run verify
|
|
|
+ # 比对差异
|
|
|
+ $0 diff baseline verify
|
|
|
+
|
|
|
+环境变量:
|
|
|
+ BASE_URL=$BASE_URL
|
|
|
+ RESULTS_DIR=$RESULTS_DIR
|
|
|
+EOF
|
|
|
+ exit 1
|
|
|
+ ;;
|
|
|
+esac
|