version_step1_rename.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. #!/usr/bin/env python3
  2. """
  3. Step 1: Rename 现有 version 标签到新命名方案。
  4. - capability.version: 'howard_dedup' → 'dev_abstract' (332)
  5. - resource.version: 'howard_dedup' → 'dev_abstract' (2539)
  6. - strategy.version: 'howard_strategy_instance' → 'dev_abstract' (26)
  7. - knowledge.version: 'howard_strategy_instance' → 'dev_abstract' (193); v0 保持不变
  8. requirement 不动(跨版本共享)。
  9. """
  10. import sys, time
  11. from pathlib import Path
  12. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  13. from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
  14. RENAMES = [
  15. ('capability', 'howard_dedup', 'dev_abstract'),
  16. ('resource', 'howard_dedup', 'dev_abstract'),
  17. ('strategy', 'howard_strategy_instance', 'dev_abstract'),
  18. ('knowledge', 'howard_strategy_instance', 'dev_abstract'),
  19. ]
  20. def main():
  21. s = PostgreSQLCapabilityStore()
  22. cur = s._get_cursor()
  23. try:
  24. cur.execute("SET statement_timeout = '60s'")
  25. # kill idle-in-tx(防卡锁)
  26. cur.execute("""SELECT pid FROM pg_stat_activity WHERE state='idle in transaction'
  27. AND pid!=pg_backend_pid() AND datname=current_database()""")
  28. for r in cur.fetchall():
  29. pid = r['pid']
  30. print(f'kill idle-in-tx pid={pid}', flush=True)
  31. cur.execute("SELECT pg_terminate_backend(%s)", (pid,))
  32. for tbl, old, new in RENAMES:
  33. t0 = time.time()
  34. cur.execute(f"SELECT COUNT(*) c FROM {tbl} WHERE version=%s", (old,))
  35. before = cur.fetchone()['c']
  36. cur.execute(f"UPDATE {tbl} SET version=%s WHERE version=%s", (new, old))
  37. cur.execute(f"SELECT COUNT(*) c FROM {tbl} WHERE version=%s", (new,))
  38. after = cur.fetchone()['c']
  39. print(f'[{time.time()-t0:.1f}s] {tbl}: {old}→{new} before={before} after(new)={after}', flush=True)
  40. print('\n=== 全表 version 分布(验证)===', flush=True)
  41. for tbl in ['capability', 'resource', 'strategy', 'knowledge', 'requirement']:
  42. cur.execute(f"SELECT version, COUNT(*) n FROM {tbl} GROUP BY version ORDER BY n DESC")
  43. parts = [f'{r["version"]}={r["n"]}' for r in cur.fetchall()]
  44. print(f' {tbl}: {", ".join(parts)}', flush=True)
  45. finally:
  46. cur.close(); s.close()
  47. if __name__ == '__main__':
  48. main()