#!/usr/bin/env python3 """ 读取长文 JSON,计算权重 = 分发realplay_uv / 当日分发曝光uv,按权重降序输出。 每个输入文件单独生成一个输出文件(不合并)。 """ from __future__ import annotations import argparse import json import math from pathlib import Path def load_records(path: Path) -> list[dict]: rows: list[dict] = [] with open(path, encoding="utf-8") as f: data = json.load(f) if not isinstance(data, list): raise ValueError(f"{path} 根节点应为数组") for item in data: ext = item.get("ext_data") or {} expose = ext.get("当日分发曝光uv") realplay = ext.get("分发realplay_uv") if expose is None or realplay is None: continue try: expose_f = float(expose) realplay_f = float(realplay) except (TypeError, ValueError): continue if expose_f <= 0: weight = float("nan") else: weight = realplay_f / expose_f rows.append( { "videoid": str(item.get("videoid", "")), "二级品类": item.get("二级品类", ""), "权重值": weight, } ) return rows def _sort_key(r: dict) -> tuple: w = r["权重值"] if math.isnan(w): return (1, 0.0) return (0, -w) def default_out_path(inp: Path, out_dir: Path | None) -> Path: name = f"{inp.stem}_weight_rank.txt" if out_dir is not None: return out_dir / name return inp.parent / name def main() -> None: base = Path(__file__).resolve().parent / "data" / "changwen_data" parser = argparse.ArgumentParser( description="长文数据按 分发realplay_uv/当日分发曝光uv 排序;每个输入单独写一个输出文件" ) parser.add_argument( "inputs", nargs="*", type=Path, default=[ base / "奇观妙技有乾坤.json", base / "青史铁事漫谈.json", ], help="输入 JSON 路径(默认两个账号文件)", ) parser.add_argument( "--out-dir", type=Path, default=None, help="可选:把所有输出写到该目录(文件名仍为 {原文件名去扩展}_weight_rank.txt)", ) args = parser.parse_args() paths = [p.resolve() for p in args.inputs] for p in paths: if not p.is_file(): raise SystemExit(f"文件不存在: {p}") out_dir = args.out_dir.resolve() if args.out_dir else None if out_dir is not None: out_dir.mkdir(parents=True, exist_ok=True) for p in paths: rows = load_records(p) rows.sort(key=_sort_key) lines = [] for r in rows: w = r["权重值"] w_str = "nan" if w != w else f"{w:.6f}" lines.append(f"{r['videoid']}\t{r['二级品类']}\t{w_str}") out = default_out_path(p, out_dir) if out_dir is None: out.parent.mkdir(parents=True, exist_ok=True) text = "\n".join(lines) + ("\n" if lines else "") out.write_text(text, encoding="utf-8") print(f"{p.name}: 共 {len(lines)} 行 -> {out}") if __name__ == "__main__": main()