Server
/
video-vector-server


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
							#!/usr/bin/env bash
# recall_regression_test.sh
#
# 召回测试接口 — 部署前后回归对比脚本
#
# 用法:
#   ./recall_regression_test.sh run baseline       # 部署前: 跑全部用例,存到 results/baseline/
#   ./recall_regression_test.sh run verify         # 部署后: 跑同样用例,存到 results/verify/
#   ./recall_regression_test.sh diff baseline verify   # 比对两次输出
#
# 可选环境变量:
#   BASE_URL      默认 https://api-internal.piaoquantv.com/videoVector
#   RESULTS_DIR   默认 ./recall_test_results
#
# 依赖: curl, python3 (用于 json 格式化)

set -euo pipefail

BASE_URL="${BASE_URL:-https://api-internal.piaoquantv.com/videoVector}"
RESULTS_DIR="${RESULTS_DIR:-$(cd "$(dirname "$0")" && pwd)/recall_test_results}"

# ===== 测试输入 =====
# 真实视频 ID (用户确认)
VIDEO_ID_PRIMARY=64632804
# 候补 ID (从 prod matchByText 实际返回中取得)
VIDEO_ID_SECONDARY=67688956

# 文本召回查询词
QUERY_TEXTS=(
  "夏季减肥小妙招"
  "宝宝辅食做法"
  "婆婆和儿媳"
)

# 配置编码
CONFIG_CODES=(VIDEO_TOPIC VIDEO_INSPIRATION)

# Top-N (固定,避免参数不同导致的 size 差异)
TOP_N=10

# ===== 工具函数 =====
log() { echo "[$(date +%H:%M:%S)] $*" >&2; }

# 美化 + 标准化 JSON (排序 key、UTF-8 不转义、固定缩进) → 稳定 diff
prettify() {
  python3 -c '
import sys, json
try:
    d = json.load(sys.stdin)
    print(json.dumps(d, indent=2, ensure_ascii=False, sort_keys=True))
except Exception as e:
    sys.stderr.write("JSON parse failed: %s\n" % e)
    sys.exit(1)
'
}

# 调 GET
do_get() {
  local name="$1"; shift
  local path="$1"; shift
  local out_dir="$1"; shift
  local raw="$out_dir/${name}.raw.txt"
  local pretty="$out_dir/${name}.json"

  log "GET  $name -> $path"
  if curl -sS --max-time 30 "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
    if prettify < "$raw" > "$pretty" 2>/dev/null; then
      rm -f "$raw" "$raw.err"
    else
      log "  WARN: $name 返回非 JSON,保留 .raw.txt"
      mv "$raw" "$pretty"
      rm -f "$raw.err"
    fi
  else
    log "  ERROR: $name curl 失败"
    mv "$raw.err" "$pretty"
    rm -f "$raw"
  fi
}

# 调 POST
do_post() {
  local name="$1"; shift
  local path="$1"; shift
  local body="$1"; shift
  local out_dir="$1"; shift
  local raw="$out_dir/${name}.raw.txt"
  local pretty="$out_dir/${name}.json"

  log "POST $name -> $path  body=$body"
  if curl -sS --max-time 60 -X POST \
       -H "Content-Type: application/json" \
       -d "$body" \
       "${BASE_URL}${path}" > "$raw" 2>"$raw.err"; then
    if prettify < "$raw" > "$pretty" 2>/dev/null; then
      rm -f "$raw" "$raw.err"
    else
      log "  WARN: $name 返回非 JSON,保留 .raw.txt"
      mv "$raw" "$pretty"
      rm -f "$raw.err"
    fi
  else
    log "  ERROR: $name curl 失败"
    mv "$raw.err" "$pretty"
    rm -f "$raw"
  fi
}

# ===== run 模式 =====
run_tests() {
  local label="$1"
  local out_dir="$RESULTS_DIR/$label"
  mkdir -p "$out_dir"

  log "==> 输出目录: $out_dir"
  log "==> BASE_URL: $BASE_URL"

  # --- videoDetail ---
  do_get "videoDetail__primary"   "/recallTest/videoDetail?videoId=$VIDEO_ID_PRIMARY"   "$out_dir"
  do_get "videoDetail__secondary" "/recallTest/videoDetail?videoId=$VIDEO_ID_SECONDARY" "$out_dir"
  do_get "videoDetail__missing"   "/recallTest/videoDetail?videoId=1"                   "$out_dir"

  # --- aiUnderstanding ---
  do_get "aiUnderstanding__primary" "/recallTest/aiUnderstanding?videoId=$VIDEO_ID_PRIMARY" "$out_dir"

  # --- deconstructPoints ---
  do_get "deconstructPoints__primary"   "/recallTest/deconstructPoints?videoId=$VIDEO_ID_PRIMARY"   "$out_dir"
  do_get "deconstructPoints__secondary" "/recallTest/deconstructPoints?videoId=$VIDEO_ID_SECONDARY" "$out_dir"

  # --- matchByText × (queryText × configCode) ---
  local idx=0
  for qt in "${QUERY_TEXTS[@]}"; do
    idx=$((idx+1))
    for cc in "${CONFIG_CODES[@]}"; do
      do_post \
        "matchByText__q${idx}__${cc}" \
        "/recallTest/matchByText" \
        "{\"queryText\":\"$qt\",\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
        "$out_dir"
    done
    # 默认 configCode (不传)
    do_post \
      "matchByText__q${idx}__DEFAULT" \
      "/recallTest/matchByText" \
      "{\"queryText\":\"$qt\",\"topN\":$TOP_N}" \
      "$out_dir"
  done

  # --- matchByText 边界: 空文本 ---
  do_post "matchByText__empty" "/recallTest/matchByText" \
    "{\"queryText\":\"\",\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"

  # --- matchByVideoId × configCode ---
  for cc in "${CONFIG_CODES[@]}"; do
    do_post \
      "matchByVideoId__primary__${cc}" \
      "/recallTest/matchByVideoId" \
      "{\"videoId\":$VIDEO_ID_PRIMARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
      "$out_dir"
    do_post \
      "matchByVideoId__secondary__${cc}" \
      "/recallTest/matchByVideoId" \
      "{\"videoId\":$VIDEO_ID_SECONDARY,\"configCode\":\"$cc\",\"topN\":$TOP_N}" \
      "$out_dir"
  done
  # 默认 configCode
  do_post \
    "matchByVideoId__primary__DEFAULT" \
    "/recallTest/matchByVideoId" \
    "{\"videoId\":$VIDEO_ID_PRIMARY,\"topN\":$TOP_N}" \
    "$out_dir"

  # --- matchByVideoId 边界: 不存在的 ID ---
  do_post "matchByVideoId__missing" "/recallTest/matchByVideoId" \
    "{\"videoId\":1,\"configCode\":\"VIDEO_TOPIC\",\"topN\":$TOP_N}" "$out_dir"

  log "==> 完成,共生成 $(ls "$out_dir" | wc -l | tr -d ' ') 个文件"
  log "==> 路径: $out_dir"
}

# ===== diff 模式 =====
diff_results() {
  local a="$1"
  local b="$2"
  local dir_a="$RESULTS_DIR/$a"
  local dir_b="$RESULTS_DIR/$b"

  if [[ ! -d "$dir_a" ]]; then echo "目录不存在: $dir_a" >&2; exit 2; fi
  if [[ ! -d "$dir_b" ]]; then echo "目录不存在: $dir_b" >&2; exit 2; fi

  echo "==> 比对 $dir_a   vs   $dir_b"
  echo

  local files_a files_b
  files_a=$(cd "$dir_a" && ls *.json 2>/dev/null | sort)
  files_b=$(cd "$dir_b" && ls *.json 2>/dev/null | sort)

  if [[ "$files_a" != "$files_b" ]]; then
    echo "!! 文件清单不一致:"
    diff <(echo "$files_a") <(echo "$files_b") || true
    echo
  fi

  local total=0 changed=0 same=0
  for f in $files_a; do
    if [[ ! -f "$dir_b/$f" ]]; then continue; fi
    total=$((total+1))
    if diff -q "$dir_a/$f" "$dir_b/$f" >/dev/null 2>&1; then
      same=$((same+1))
    else
      changed=$((changed+1))
      echo "----- DIFF: $f -----"
      diff -u "$dir_a/$f" "$dir_b/$f" || true
      echo
    fi
  done

  echo "==> 统计: 共 $total 个文件, 一致 $same, 有差异 $changed"
}

# ===== 入口 =====
cmd="${1:-}"
case "$cmd" in
  run)
    label="${2:-baseline}"
    run_tests "$label"
    ;;
  diff)
    a="${2:-baseline}"
    b="${3:-verify}"
    diff_results "$a" "$b"
    ;;
  *)
    cat <<EOF
用法:
  $0 run [LABEL]              # 跑全部测试,默认 LABEL=baseline
  $0 diff LABEL_A LABEL_B     # 比对两次输出

示例:
  # 部署前
  $0 run baseline
  # (你部署新版本)
  $0 run verify
  # 比对差异
  $0 diff baseline verify

环境变量:
  BASE_URL=$BASE_URL
  RESULTS_DIR=$RESULTS_DIR
EOF
    exit 1
    ;;
esac