app.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. import sys
  2. import os
  3. import json
  4. from datetime import datetime, timedelta
  5. import streamlit as st
  6. import pandas as pd
  7. from sqlalchemy import create_engine
  8. from dotenv import load_dotenv
  9. import plotly.express as px
  10. # ===== 项目路径 =====
  11. sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
  12. from app.core.config import settings
  13. # ===== 页面配置 =====
  14. st.set_page_config(
  15. page_title="微信指数趋势看板",
  16. page_icon="📈",
  17. layout="wide",
  18. initial_sidebar_state="expanded",
  19. # menu_items={
  20. # 'Get Help': 'https://www.extremelycoolapp.com/help',
  21. # 'Report a bug': "https://www.extremelycoolapp.com/bug",
  22. # 'About': "# This is a header. This is an *extremely* cool app!"
  23. # }
  24. )
  25. load_dotenv(os.path.join(os.path.dirname(__file__), '../.env'))
  26. # ===== 字段映射 =====
  27. FIELD_MAPPING = {
  28. 'total_score': '总指数',
  29. 'finder_score': '视频号',
  30. 'query_score': '搜一搜',
  31. 'mpdoc_score': '公众号',
  32. 'live_score': '直播',
  33. 'miniapp_score': '小程序'
  34. }
  35. # ======================
  36. # 数据层
  37. # ======================
  38. @st.cache_resource
  39. def get_db_connection():
  40. return create_engine(settings.DATABASE_URL.replace('+aiomysql', '+pymysql'))
  41. @st.cache_data(ttl=3600)
  42. def get_keywords():
  43. engine = get_db_connection()
  44. sql = """
  45. SELECT id, keyword
  46. FROM wx_trend_keywords
  47. WHERE is_active = 1
  48. ORDER BY priority DESC
  49. """
  50. with engine.connect() as conn:
  51. return pd.read_sql(sql, conn)
  52. def add_keyword(keyword):
  53. """
  54. 添加关键词:如果存在则激活,不存在则新增
  55. """
  56. from sqlalchemy import text
  57. engine = get_db_connection()
  58. with engine.begin() as conn:
  59. # 检查是否已存在
  60. check_sql = text("SELECT id, is_active FROM wx_trend_keywords WHERE keyword = :keyword")
  61. result = conn.execute(check_sql, {"keyword": keyword}).fetchone()
  62. if result:
  63. # 已存在,激活
  64. if result[1] == 0:
  65. update_sql = text("UPDATE wx_trend_keywords SET is_active = 1 WHERE id = :id")
  66. conn.execute(update_sql, {"id": result[0]})
  67. return True, f"关键词 '{keyword}' 已重新激活"
  68. else:
  69. return False, f"关键词 '{keyword}' 已存在且处于激活状态"
  70. else:
  71. # 不存在,新增
  72. insert_sql = text("INSERT INTO wx_trend_keywords (keyword, is_active, priority) VALUES (:keyword, 1, 0)")
  73. conn.execute(insert_sql, {"keyword": keyword})
  74. return True, f"关键词 '{keyword}' 已成功添加"
  75. def delete_keyword(keyword_id):
  76. """
  77. 删除关键词:将is_active改为0
  78. """
  79. from sqlalchemy import text
  80. engine = get_db_connection()
  81. with engine.begin() as conn:
  82. sql = text("UPDATE wx_trend_keywords SET is_active = 0 WHERE id = :id")
  83. result = conn.execute(sql, {"id": keyword_id})
  84. return result.rowcount > 0
  85. def fetch_real_time_data(keyword):
  86. """
  87. 实时查询关键词数据
  88. """
  89. import httpx
  90. from datetime import datetime, timedelta
  91. from app.core.config import settings
  92. end_date = datetime.now()
  93. start_date = end_date - timedelta(days=settings.LIMIT_DAY)
  94. payload = {
  95. "keyword": keyword,
  96. "start_ymd": str(start_date.strftime("%Y%m%d")),
  97. "end_ymd": str(end_date.strftime("%Y%m%d"))
  98. }
  99. try:
  100. with httpx.Client(timeout=30.0) as client:
  101. resp = client.post(settings.API_URL, json=payload)
  102. resp.raise_for_status()
  103. data = resp.json()
  104. if data.get('code') != 0:
  105. return False, data.get('msg', 'API请求失败')
  106. raw_list = data.get('data', {}).get('data', [])
  107. raw_list.sort(key=lambda x: x['ymd'])
  108. if not raw_list:
  109. return False, '未获取到数据'
  110. # 转换为DataFrame
  111. df_list = []
  112. for item in raw_list:
  113. row = {
  114. 'keyword': keyword,
  115. 'ymd': item['ymd'],
  116. 'date': pd.to_datetime(item['ymd'], format='%Y%m%d')
  117. }
  118. row.update(item['channel_score'])
  119. df_list.append(row)
  120. return True, pd.DataFrame(df_list)
  121. except Exception as e:
  122. return False, f"查询失败: {str(e)}"
  123. @st.cache_data(ttl=3600)
  124. def get_trend_data(keyword_ids, start_date, end_date):
  125. if not keyword_ids:
  126. return pd.DataFrame()
  127. engine = get_db_connection()
  128. s_date = start_date.strftime("%Y%m%d")
  129. e_date = end_date.strftime("%Y%m%d")
  130. ids_tuple = tuple(keyword_ids) if len(keyword_ids) > 1 else f"({keyword_ids[0]})"
  131. sql = f"""
  132. SELECT
  133. k.keyword,
  134. d.ymd,
  135. d.channel_score
  136. FROM wx_trend_data d
  137. JOIN wx_trend_keywords k ON d.keyword_id = k.id
  138. WHERE d.keyword_id IN {ids_tuple}
  139. AND d.ymd BETWEEN '{s_date}' AND '{e_date}'
  140. ORDER BY d.ymd ASC
  141. """
  142. with engine.connect() as conn:
  143. df = pd.read_sql(sql, conn)
  144. if df.empty:
  145. return df
  146. if isinstance(df['channel_score'].iloc[0], str):
  147. df['channel_score'] = df['channel_score'].apply(json.loads)
  148. score_df = pd.json_normalize(df['channel_score'])
  149. result = pd.concat([df[['keyword', 'ymd']], score_df], axis=1)
  150. result['date'] = pd.to_datetime(result['ymd'], format='%Y%m%d')
  151. return result
  152. # ======================
  153. # Sidebar
  154. # ======================
  155. # 初始化session state
  156. if 'recent_keywords' not in st.session_state:
  157. st.session_state['recent_keywords'] = []
  158. # ======================
  159. # 实时查询
  160. # ======================
  161. with st.sidebar.expander("⚡ 实时查询", expanded=True):
  162. realtime_keyword = st.text_input("输入要实时查询的关键词", placeholder="例如:春节")
  163. if st.button("实时查询"):
  164. if realtime_keyword.strip():
  165. with st.spinner(f"正在查询 '{realtime_keyword}' 的实时数据..."):
  166. success, result = fetch_real_time_data(realtime_keyword.strip())
  167. if success:
  168. # 将实时数据存储到session state
  169. st.session_state['realtime_data'] = result
  170. st.session_state['realtime_keyword'] = realtime_keyword.strip()
  171. # 更新最近使用的关键词
  172. if realtime_keyword.strip() not in st.session_state['recent_keywords']:
  173. st.session_state['recent_keywords'].insert(0, realtime_keyword.strip())
  174. # 只保留最近10个
  175. st.session_state['recent_keywords'] = st.session_state['recent_keywords'][:10]
  176. st.success("实时查询成功")
  177. else:
  178. st.error(f"查询失败: {result}")
  179. else:
  180. st.warning("请输入关键词")
  181. # 最近使用的关键词
  182. if st.session_state['recent_keywords']:
  183. st.markdown("**最近查询**:")
  184. for i, kw in enumerate(st.session_state['recent_keywords'][:5]):
  185. if st.button(kw, key=f"recent_{i}", help="点击使用此关键词"):
  186. # 触发实时查询
  187. success, result = fetch_real_time_data(kw)
  188. if success:
  189. st.session_state['realtime_data'] = result
  190. st.session_state['realtime_keyword'] = kw
  191. st.success(f"已查询关键词: {kw}")
  192. else:
  193. st.error(f"查询失败: {result}")
  194. # ======================
  195. # 筛选条件
  196. # ======================
  197. with st.sidebar.expander("🔍 筛选条件", expanded=True):
  198. kw_df = get_keywords()
  199. keyword_options = kw_df['keyword'].tolist()
  200. # 关键词搜索
  201. search_term = st.text_input("搜索关键词", placeholder="输入关键词搜索")
  202. # 过滤关键词
  203. filtered_keywords = keyword_options
  204. if search_term:
  205. filtered_keywords = [kw for kw in keyword_options if search_term.lower() in kw.lower()]
  206. # 关键词选择:支持多选
  207. selected_keywords = st.multiselect(
  208. "选择关键词(可多选)",
  209. options=filtered_keywords,
  210. default=filtered_keywords[:3] if filtered_keywords else [],
  211. placeholder="请选择关键词"
  212. )
  213. # 自定义关键词输入
  214. custom_keyword = ""
  215. if st.checkbox("输入自定义关键词", value=False):
  216. custom_keyword = st.text_input("自定义关键词", placeholder="例如:春节")
  217. # 确定最终的关键词
  218. final_keywords = selected_keywords.copy()
  219. if custom_keyword:
  220. final_keywords.append(custom_keyword)
  221. # 获取选中关键词的ID(仅适用于列表中的关键词)
  222. selected_ids = []
  223. for keyword in selected_keywords:
  224. if keyword in keyword_options:
  225. selected_ids.extend(kw_df[kw_df['keyword'] == keyword]['id'].tolist())
  226. date_range = st.date_input(
  227. "日期范围",
  228. value=(datetime.now() - timedelta(days=30), datetime.now()),
  229. max_value=datetime.now()
  230. )
  231. selected_metrics = st.multiselect(
  232. "展示维度",
  233. options=list(FIELD_MAPPING.keys()),
  234. format_func=lambda x: FIELD_MAPPING[x],
  235. default=['total_score']
  236. )
  237. # 快捷操作
  238. col1, col2 = st.columns(2)
  239. with col1:
  240. if st.button("清空筛选"):
  241. # 清空筛选条件
  242. st.session_state['realtime_data'] = None
  243. st.session_state['realtime_keyword'] = None
  244. st.success("筛选条件已清空")
  245. with col2:
  246. if st.button("刷新数据"):
  247. # 清除缓存
  248. get_keywords.clear()
  249. st.success("数据已刷新")
  250. # ======================
  251. # 关键词管理
  252. # ======================
  253. with st.sidebar.expander("📝 关键词管理", expanded=False):
  254. # 添加关键词
  255. new_keyword = st.text_input("输入新关键词", placeholder="例如:人工智能")
  256. if st.button("添加关键词"):
  257. if new_keyword.strip():
  258. success, message = add_keyword(new_keyword.strip())
  259. if success:
  260. st.success(message)
  261. # 清除缓存
  262. get_keywords.clear()
  263. # 重新加载关键词列表
  264. kw_df = get_keywords()
  265. else:
  266. st.warning(message)
  267. else:
  268. st.warning("请输入关键词")
  269. # 删除关键词
  270. if not kw_df.empty:
  271. keyword_options = {row['keyword']: row['id'] for _, row in kw_df.iterrows()}
  272. selected_del_keyword = st.selectbox(
  273. "选择要删除的关键词",
  274. options=list(keyword_options.keys()),
  275. index=None,
  276. placeholder="请选择关键词"
  277. )
  278. if st.button("删除关键词"):
  279. if selected_del_keyword:
  280. keyword_id = keyword_options[selected_del_keyword]
  281. success = delete_keyword(keyword_id)
  282. if success:
  283. st.success(f"关键词 '{selected_del_keyword}' 已删除")
  284. # 清除缓存
  285. get_keywords.clear()
  286. # 重新加载关键词列表
  287. kw_df = get_keywords()
  288. else:
  289. st.error("删除失败,请重试")
  290. else:
  291. st.warning("请选择要删除的关键词")
  292. # ======================
  293. # 数据导出
  294. # ======================
  295. # with st.sidebar.expander("💾 数据导出", expanded=False):
  296. # st.markdown("**导出当前数据**:")
  297. # if st.button("导出为CSV"):
  298. # if 'df' in locals():
  299. # import io
  300. # csv = df.to_csv(index=False)
  301. # st.download_button(
  302. # label="下载CSV文件",
  303. # data=csv,
  304. # file_name=f"微信指数_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
  305. # mime="text/csv"
  306. # )
  307. # else:
  308. # st.warning("请先查询数据")
  309. # if st.button("导出为Excel"):
  310. # if 'df' in locals():
  311. # try:
  312. # import openpyxl
  313. # import io
  314. # output = io.BytesIO()
  315. # with pd.ExcelWriter(output, engine='openpyxl') as writer:
  316. # df.to_excel(writer, index=False, sheet_name='微信指数')
  317. # output.seek(0)
  318. # st.download_button(
  319. # label="下载Excel文件",
  320. # data=output,
  321. # file_name=f"微信指数_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx",
  322. # mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
  323. # )
  324. # except ImportError:
  325. # st.warning("请先安装openpyxl库以支持Excel导出")
  326. # else:
  327. # st.warning("请先查询数据")
  328. # ======================
  329. # Main
  330. # =====================
  331. st.title("微信指数趋势")
  332. # 检查是否有实时查询数据
  333. realtime_data = st.session_state.get('realtime_data')
  334. realtime_keyword = st.session_state.get('realtime_keyword')
  335. # 处理查询逻辑
  336. if final_keywords:
  337. # 检查是否有自定义关键词需要实时查询
  338. if custom_keyword:
  339. # 执行实时查询
  340. with st.spinner(f"正在实时查询 '{custom_keyword}' 的数据..."):
  341. success, result = fetch_real_time_data(custom_keyword)
  342. if success:
  343. st.subheader(f"⚡ 实时查询结果:{custom_keyword}")
  344. df = result
  345. else:
  346. st.error(f"查询失败: {result}")
  347. st.stop()
  348. elif realtime_data is not None:
  349. # 显示实时查询结果
  350. st.subheader(f"⚡ 实时查询结果:{realtime_keyword}")
  351. df = realtime_data
  352. else:
  353. # 显示常规查询结果(支持多个关键词)
  354. if not selected_ids:
  355. st.warning("请至少选择一个关键词")
  356. st.stop()
  357. if len(date_range) != 2:
  358. st.warning("请选择完整的日期范围")
  359. st.stop()
  360. with st.spinner("正在加载数据..."):
  361. df = get_trend_data(selected_ids, date_range[0], date_range[1])
  362. if df.empty:
  363. st.warning("所选时间段内暂无数据")
  364. st.stop()
  365. else:
  366. st.warning("请选择或输入关键词")
  367. st.stop()
  368. # ======================
  369. # KPI 卡片
  370. # ======================
  371. st.markdown("### 💡 最新指数概览")
  372. # 获取唯一的关键词列表
  373. unique_keywords = df['keyword'].unique().tolist()
  374. cols = st.columns(min(len(unique_keywords), 4))
  375. for i, kw in enumerate(unique_keywords):
  376. kw_df = df[df['keyword'] == kw].sort_values("date")
  377. if kw_df.empty:
  378. continue
  379. latest = kw_df.iloc[-1]
  380. prev = kw_df.iloc[-2] if len(kw_df) > 1 else None
  381. delta = ""
  382. if prev is not None and prev['total_score'] > 0:
  383. pct = (latest['total_score'] - prev['total_score']) / prev['total_score'] * 100
  384. delta = f"{pct:+.2f}%"
  385. with cols[i % 4]:
  386. st.metric(
  387. label=f"{kw}(总指数)",
  388. value=f"{int(latest['total_score']):,}",
  389. delta=delta
  390. )
  391. st.divider()
  392. # ======================
  393. # 趋势图(专业版)
  394. # ======================
  395. st.markdown("### 📈 趋势对比分析")
  396. compare_metric = st.selectbox(
  397. "对比指标",
  398. options=selected_metrics,
  399. format_func=lambda x: FIELD_MAPPING[x]
  400. )
  401. fig = px.line(
  402. df,
  403. x="date",
  404. y=compare_metric,
  405. color="keyword",
  406. title=f"各关键词【{FIELD_MAPPING[compare_metric]}】趋势对比"
  407. )
  408. # —— 核心专业配置 ——
  409. fig.update_traces(
  410. mode="lines",
  411. line=dict(width=3),
  412. hovertemplate=(
  413. # "日期:%{x|%Y.%m.%d}<br>"
  414. "关键词:%{data.name}<br>"
  415. "指数:%{y:,.0f}"
  416. "<extra></extra>"
  417. )
  418. )
  419. fig.update_layout(
  420. hovermode="x unified",
  421. margin=dict(t=80, l=60, r=40, b=50),
  422. legend=dict(
  423. orientation="h",
  424. yanchor="bottom",
  425. y=1.05,
  426. xanchor="right",
  427. x=1,
  428. title="关键词(点击可隐藏)"
  429. ),
  430. xaxis=dict(
  431. title="日期",
  432. tickformat="%Y.%m.%d",
  433. showgrid=False,
  434. tickfont=dict(size=12)
  435. ),
  436. yaxis=dict(
  437. title="指数",
  438. showgrid=True,
  439. gridcolor="rgba(255,255,255,0.06)",
  440. tickfont=dict(size=12),
  441. tickformat=",d"
  442. ),
  443. hoverlabel=dict(
  444. font_size=12,
  445. font_family="Arial"
  446. )
  447. )
  448. st.plotly_chart(fig, width='stretch')
  449. # st.caption(
  450. # "说明:微信指数为相对热度指标,用于趋势对比分析,不代表绝对搜索量。"
  451. # )
  452. # ======================
  453. # 原始数据
  454. # ======================
  455. with st.expander("📄 查看原始数据"):
  456. display_df = df[['date', 'keyword'] + selected_metrics].copy()
  457. display_df.columns = ['日期', '关键词'] + [FIELD_MAPPING[m] for m in selected_metrics]
  458. display_df['日期'] = display_df['日期'].dt.strftime("%Y-%m-%d")
  459. st.dataframe(
  460. display_df.sort_values(['日期', '关键词'], ascending=[False, True]),
  461. width='stretch',
  462. hide_index=True
  463. )