check_table_structure.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. #!/usr/bin/env python3
  2. """
  3. 检查四张核心表的结构和数据量(knowledge 表只查结构,不查数据量避免卡住)
  4. """
  5. import os, psycopg2
  6. from psycopg2.extras import RealDictCursor
  7. from dotenv import load_dotenv
  8. _dir = os.path.dirname(os.path.abspath(__file__))
  9. _root = os.path.normpath(os.path.join(_dir, '..', '..'))
  10. load_dotenv(os.path.join(_root, '.env'))
  11. conn = psycopg2.connect(
  12. host=os.getenv('KNOWHUB_DB'),
  13. port=int(os.getenv('KNOWHUB_PORT', 5432)),
  14. user=os.getenv('KNOWHUB_USER'),
  15. password=os.getenv('KNOWHUB_PASSWORD'),
  16. database=os.getenv('KNOWHUB_DB_NAME'),
  17. connect_timeout=10
  18. )
  19. cur = conn.cursor(cursor_factory=RealDictCursor)
  20. print("Connected.\n")
  21. def get_columns(table):
  22. """用 pg_attribute 查列,比 information_schema 快得多"""
  23. cur.execute("""
  24. SELECT a.attname AS col, pg_catalog.format_type(a.atttypid, a.atttypmod) AS dtype
  25. FROM pg_catalog.pg_attribute a
  26. JOIN pg_catalog.pg_class c ON a.attrelid = c.oid
  27. JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
  28. WHERE c.relname = %s AND n.nspname = 'public'
  29. AND a.attnum > 0 AND NOT a.attisdropped
  30. ORDER BY a.attnum
  31. """, (table,))
  32. return cur.fetchall()
  33. def get_count(table):
  34. cur.execute(f"SELECT COUNT(*) as cnt FROM {table}")
  35. return cur.fetchone()['cnt']
  36. # ── 查结构 ──
  37. tables = ['atomic_capability', 'tool_table', 'requirement_table', 'knowledge']
  38. for t in tables:
  39. cols = get_columns(t)
  40. # knowledge 表可能很慢,只查结构不查行数
  41. if t == 'knowledge':
  42. print(f"\n=== {t} (skipped count) ===")
  43. else:
  44. cnt = get_count(t)
  45. print(f"\n=== {t} ({cnt} rows) ===")
  46. for c in cols:
  47. print(f" {c['col']:25s} {c['dtype']}")
  48. # ── 抽样 ──
  49. print("\n\n=== SAMPLE DATA ===")
  50. print("\natomic_capability (first 3):")
  51. cur.execute("SELECT id, name FROM atomic_capability LIMIT 3")
  52. for r in cur.fetchall():
  53. print(f" {r['id']}: {r['name']}")
  54. print("\nrequirement_table (first 3):")
  55. cur.execute("SELECT id, status FROM requirement_table LIMIT 3")
  56. for r in cur.fetchall():
  57. print(f" {r['id']}: {r.get('status', '?')}")
  58. print("\ntool_table (first 3):")
  59. cur.execute("SELECT id, name FROM tool_table LIMIT 3")
  60. for r in cur.fetchall():
  61. print(f" {r['id']}: {r['name']}")
  62. cur.close()
  63. conn.close()
  64. print("\nDone.")