version_investigate.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #!/usr/bin/env python3
  2. """
  3. Step 1: 查看当前 DB version 分布,为 dev_dedup / dev_abstract 版本化做准备。
  4. 不改数据,只 SELECT。
  5. """
  6. import sys
  7. from pathlib import Path
  8. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  9. from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
  10. def show(cur, title, sql, params=None):
  11. print(f'\n=== {title} ===', flush=True)
  12. cur.execute(sql, params or ())
  13. rows = cur.fetchall()
  14. if not rows:
  15. print(' (无数据)', flush=True); return
  16. cols = list(rows[0].keys())
  17. print(' ' + ' | '.join(cols), flush=True)
  18. for r in rows:
  19. print(' ' + ' | '.join(str(r[c]) for c in cols), flush=True)
  20. def main():
  21. s = PostgreSQLCapabilityStore()
  22. cur = s._get_cursor()
  23. try:
  24. cur.execute("SET statement_timeout = '30s'")
  25. # 主表版本分布
  26. for tbl in ['capability', 'strategy', 'knowledge', 'resource']:
  27. show(cur, f'{tbl} version 分布',
  28. f'SELECT version, COUNT(*) n FROM {tbl} GROUP BY version ORDER BY n DESC')
  29. # requirement 是否有 version 列
  30. show(cur, 'requirement 表列结构',
  31. """SELECT column_name, data_type FROM information_schema.columns
  32. WHERE table_name='requirement' ORDER BY ordinal_position""")
  33. # junction 表行数
  34. for tbl in ['requirement_strategy', 'requirement_knowledge', 'requirement_capability',
  35. 'requirement_resource', 'strategy_capability', 'strategy_resource',
  36. 'strategy_knowledge', 'knowledge_capability', 'knowledge_resource']:
  37. cur.execute(f'SELECT COUNT(*) c FROM {tbl}')
  38. print(f' {tbl}: {cur.fetchone()["c"]}', flush=True)
  39. # 备份表
  40. print('\n=== bk_20260422_* 备份表 ===', flush=True)
  41. cur.execute("""SELECT table_name FROM information_schema.tables
  42. WHERE table_name LIKE 'bk_20260422_%' ORDER BY table_name""")
  43. for r in cur.fetchall():
  44. cur.execute(f'SELECT COUNT(*) c FROM {r["table_name"]}')
  45. print(f' {r["table_name"]}: {cur.fetchone()["c"]}', flush=True)
  46. # 备份 strategy 里的字段(确认有 version 列以便恢复)
  47. show(cur, 'bk_20260422_strategy 列结构',
  48. """SELECT column_name, data_type FROM information_schema.columns
  49. WHERE table_name='bk_20260422_strategy' ORDER BY ordinal_position""")
  50. # 现在 strategy 表里 version 为 howard_strategy_instance 的有多少
  51. show(cur, 'strategy version=howard_strategy_instance 的条目示例',
  52. """SELECT id, name, version FROM strategy
  53. WHERE version='howard_strategy_instance' LIMIT 3""")
  54. # knowledge v0 示例
  55. show(cur, 'knowledge version=v0 示例',
  56. """SELECT id, version FROM knowledge WHERE version='v0' LIMIT 3""")
  57. # capability 现在的 version(可能是 NULL 或某默认值)
  58. show(cur, 'capability sample',
  59. """SELECT id, name, version FROM capability LIMIT 3""")
  60. # resource 现在的 version
  61. show(cur, 'resource sample',
  62. """SELECT id, version FROM resource LIMIT 3""")
  63. finally:
  64. cur.close(); s.close()
  65. if __name__ == '__main__':
  66. main()