#!/usr/bin/env python3 """ 检查四张核心表的结构和数据量(knowledge 表只查结构,不查数据量避免卡住) """ import os, psycopg2 from psycopg2.extras import RealDictCursor from dotenv import load_dotenv _dir = os.path.dirname(os.path.abspath(__file__)) _root = os.path.normpath(os.path.join(_dir, '..', '..')) load_dotenv(os.path.join(_root, '.env')) conn = psycopg2.connect( host=os.getenv('KNOWHUB_DB'), port=int(os.getenv('KNOWHUB_PORT', 5432)), user=os.getenv('KNOWHUB_USER'), password=os.getenv('KNOWHUB_PASSWORD'), database=os.getenv('KNOWHUB_DB_NAME'), connect_timeout=10 ) cur = conn.cursor(cursor_factory=RealDictCursor) print("Connected.\n") def get_columns(table): """用 pg_attribute 查列,比 information_schema 快得多""" cur.execute(""" SELECT a.attname AS col, pg_catalog.format_type(a.atttypid, a.atttypmod) AS dtype FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_class c ON a.attrelid = c.oid JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid WHERE c.relname = %s AND n.nspname = 'public' AND a.attnum > 0 AND NOT a.attisdropped ORDER BY a.attnum """, (table,)) return cur.fetchall() def get_count(table): cur.execute(f"SELECT COUNT(*) as cnt FROM {table}") return cur.fetchone()['cnt'] # ── 查结构 ── tables = ['atomic_capability', 'tool_table', 'requirement_table', 'knowledge'] for t in tables: cols = get_columns(t) # knowledge 表可能很慢,只查结构不查行数 if t == 'knowledge': print(f"\n=== {t} (skipped count) ===") else: cnt = get_count(t) print(f"\n=== {t} ({cnt} rows) ===") for c in cols: print(f" {c['col']:25s} {c['dtype']}") # ── 抽样 ── print("\n\n=== SAMPLE DATA ===") print("\natomic_capability (first 3):") cur.execute("SELECT id, name FROM atomic_capability LIMIT 3") for r in cur.fetchall(): print(f" {r['id']}: {r['name']}") print("\nrequirement_table (first 3):") cur.execute("SELECT id, status FROM requirement_table LIMIT 3") for r in cur.fetchall(): print(f" {r['id']}: {r.get('status', '?')}") print("\ntool_table (first 3):") cur.execute("SELECT id, name FROM tool_table LIMIT 3") for r in cur.fetchall(): print(f" {r['id']}: {r['name']}") cur.close() conn.close() print("\nDone.")