| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- #!/usr/bin/env python3
- """
- 检查四张核心表的结构和数据量(knowledge 表只查结构,不查数据量避免卡住)
- """
- import os, psycopg2
- from psycopg2.extras import RealDictCursor
- from dotenv import load_dotenv
- _dir = os.path.dirname(os.path.abspath(__file__))
- _root = os.path.normpath(os.path.join(_dir, '..', '..'))
- load_dotenv(os.path.join(_root, '.env'))
- conn = psycopg2.connect(
- host=os.getenv('KNOWHUB_DB'),
- port=int(os.getenv('KNOWHUB_PORT', 5432)),
- user=os.getenv('KNOWHUB_USER'),
- password=os.getenv('KNOWHUB_PASSWORD'),
- database=os.getenv('KNOWHUB_DB_NAME'),
- connect_timeout=10
- )
- cur = conn.cursor(cursor_factory=RealDictCursor)
- print("Connected.\n")
- def get_columns(table):
- """用 pg_attribute 查列,比 information_schema 快得多"""
- cur.execute("""
- SELECT a.attname AS col, pg_catalog.format_type(a.atttypid, a.atttypmod) AS dtype
- FROM pg_catalog.pg_attribute a
- JOIN pg_catalog.pg_class c ON a.attrelid = c.oid
- JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
- WHERE c.relname = %s AND n.nspname = 'public'
- AND a.attnum > 0 AND NOT a.attisdropped
- ORDER BY a.attnum
- """, (table,))
- return cur.fetchall()
- def get_count(table):
- cur.execute(f"SELECT COUNT(*) as cnt FROM {table}")
- return cur.fetchone()['cnt']
- # ── 查结构 ──
- tables = ['atomic_capability', 'tool_table', 'requirement_table', 'knowledge']
- for t in tables:
- cols = get_columns(t)
- # knowledge 表可能很慢,只查结构不查行数
- if t == 'knowledge':
- print(f"\n=== {t} (skipped count) ===")
- else:
- cnt = get_count(t)
- print(f"\n=== {t} ({cnt} rows) ===")
- for c in cols:
- print(f" {c['col']:25s} {c['dtype']}")
- # ── 抽样 ──
- print("\n\n=== SAMPLE DATA ===")
- print("\natomic_capability (first 3):")
- cur.execute("SELECT id, name FROM atomic_capability LIMIT 3")
- for r in cur.fetchall():
- print(f" {r['id']}: {r['name']}")
- print("\nrequirement_table (first 3):")
- cur.execute("SELECT id, status FROM requirement_table LIMIT 3")
- for r in cur.fetchall():
- print(f" {r['id']}: {r.get('status', '?')}")
- print("\ntool_table (first 3):")
- cur.execute("SELECT id, name FROM tool_table LIMIT 3")
- for r in cur.fetchall():
- print(f" {r['id']}: {r['name']}")
- cur.close()
- conn.close()
- print("\nDone.")
|