zhangliang 1 miesiąc temu
rodzic
commit
933f20182f

+ 1 - 1
app/main.py

@@ -24,7 +24,7 @@ async def start_scheduler():
     scheduler.add_job(
         scheduled_task,
         'cron',
-        # hour=settings.CRON_HOUR, # 不设置 hour 参数,代表每小时都跑
+        hour='9-23', # 不设置 hour 参数,代表每小时都跑
         minute=settings.CRON_MINUTE
     )
 

+ 4 - 10
app/repository/index_repo.py

@@ -64,19 +64,13 @@ class IndexRepository:
         # """)
         sql = text("""
                    INSERT INTO wx_trend_data (keyword_id, ymd, channel_score, update_time)
-                   VALUES (:keyword_id, :ymd, :score_json, NOW()) ON DUPLICATE KEY
-                   UPDATE
-                       channel_score = CASE
-                       WHEN channel_score !=
-                   VALUES (channel_score) THEN
-                   VALUES (channel_score)
-                       ELSE channel_score
-                   END
-                   ,
+                    VALUES (:keyword_id, :ymd, :score_json, NOW()) 
+                    ON DUPLICATE KEY UPDATE
                     update_time = CASE 
                         WHEN channel_score != VALUES(channel_score) THEN NOW()
                         ELSE update_time
-                   END
+                    END,
+                    channel_score = VALUES(channel_score)
                    """)
 
         await self.session.execute(sql, params)

+ 1 - 0
app/services/collector.py

@@ -30,6 +30,7 @@ class CollectorService:
             "start_ymd": str(start_date.strftime("%Y%m%d")),
             "end_ymd": str(end_date.strftime("%Y%m%d"))
         }
+        print(payload)
         logger.info(f"采集数据: {keyword}")
 
         try:

+ 0 - 0
dashboard/__init__.py


+ 272 - 0
dashboard/app.py

@@ -0,0 +1,272 @@
+import sys
+import os
+import json
+from datetime import datetime, timedelta
+
+import streamlit as st
+import pandas as pd
+from sqlalchemy import create_engine
+from dotenv import load_dotenv
+import plotly.express as px
+
+# ===== 项目路径 =====
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+from app.core.config import settings
+
+# ===== 页面配置 =====
+st.set_page_config(
+    page_title="微信指数趋势看板",
+    page_icon="📈",
+    layout="wide",
+    initial_sidebar_state="expanded",
+# menu_items={
+#         'Get Help': 'https://www.extremelycoolapp.com/help',
+#         'Report a bug': "https://www.extremelycoolapp.com/bug",
+#         'About': "# This is a header. This is an *extremely* cool app!"
+#     }
+)
+
+load_dotenv(os.path.join(os.path.dirname(__file__), '../.env'))
+
+# ===== 字段映射 =====
+FIELD_MAPPING = {
+    'total_score': '总指数',
+    'finder_score': '视频号',
+    'query_score': '搜一搜',
+    'mpdoc_score': '公众号',
+    'live_score': '直播',
+    'miniapp_score': '小程序'
+}
+
+# ======================
+# 数据层
+# ======================
+
+@st.cache_resource
+def get_db_connection():
+    return create_engine(settings.DATABASE_URL.replace('+aiomysql', '+pymysql'))
+
+
+@st.cache_data(ttl=3600)
+def get_keywords():
+    engine = get_db_connection()
+    sql = """
+        SELECT id, keyword
+        FROM wx_trend_keywords
+        WHERE is_active = 1
+        ORDER BY priority DESC
+    """
+    with engine.connect() as conn:
+        return pd.read_sql(sql, conn)
+
+
+@st.cache_data(ttl=3600)
+def get_trend_data(keyword_ids, start_date, end_date):
+    if not keyword_ids:
+        return pd.DataFrame()
+
+    engine = get_db_connection()
+    s_date = start_date.strftime("%Y%m%d")
+    e_date = end_date.strftime("%Y%m%d")
+
+    ids_tuple = tuple(keyword_ids) if len(keyword_ids) > 1 else f"({keyword_ids[0]})"
+
+    sql = f"""
+        SELECT
+            k.keyword,
+            d.ymd,
+            d.channel_score
+        FROM wx_trend_data d
+        JOIN wx_trend_keywords k ON d.keyword_id = k.id
+        WHERE d.keyword_id IN {ids_tuple}
+          AND d.ymd BETWEEN '{s_date}' AND '{e_date}'
+        ORDER BY d.ymd ASC
+    """
+
+    with engine.connect() as conn:
+        df = pd.read_sql(sql, conn)
+
+    if df.empty:
+        return df
+
+    if isinstance(df['channel_score'].iloc[0], str):
+        df['channel_score'] = df['channel_score'].apply(json.loads)
+
+    score_df = pd.json_normalize(df['channel_score'])
+    result = pd.concat([df[['keyword', 'ymd']], score_df], axis=1)
+    result['date'] = pd.to_datetime(result['ymd'], format='%Y%m%d')
+    return result
+
+
+# ======================
+# Sidebar
+# ======================
+
+st.sidebar.header("🔍 筛选条件")
+
+kw_df = get_keywords()
+selected_keywords = st.sidebar.multiselect(
+    "关键词",
+    options=kw_df['keyword'].tolist(),
+    default=kw_df['keyword'].iloc[:3].tolist() if not kw_df.empty else []
+)
+
+selected_ids = kw_df[kw_df['keyword'].isin(selected_keywords)]['id'].tolist()
+
+date_range = st.sidebar.date_input(
+    "日期范围",
+    value=(datetime.now() - timedelta(days=30), datetime.now()),
+    max_value=datetime.now()
+)
+
+selected_metrics = st.sidebar.multiselect(
+    "展示维度",
+    options=list(FIELD_MAPPING.keys()),
+    format_func=lambda x: FIELD_MAPPING[x],
+    default=['total_score']
+)
+
+# ======================
+# Main
+# ======================
+
+st.title("微信指数趋势洞察")
+
+if not selected_ids:
+    st.warning("请至少选择一个关键词")
+    st.stop()
+
+if len(date_range) != 2:
+    st.warning("请选择完整的日期范围")
+    st.stop()
+
+with st.spinner("正在加载数据..."):
+    df = get_trend_data(selected_ids, date_range[0], date_range[1])
+
+if df.empty:
+    st.warning("所选时间段内暂无数据")
+    st.stop()
+
+# ======================
+# KPI 卡片
+# ======================
+
+st.markdown("### 💡 最新指数概览")
+
+cols = st.columns(min(len(selected_keywords), 4))
+
+for i, kw in enumerate(selected_keywords):
+    kw_df = df[df['keyword'] == kw].sort_values("date")
+    if kw_df.empty:
+        continue
+
+    latest = kw_df.iloc[-1]
+    prev = kw_df.iloc[-2] if len(kw_df) > 1 else None
+
+    delta = ""
+    if prev is not None and prev['total_score'] > 0:
+        pct = (latest['total_score'] - prev['total_score']) / prev['total_score'] * 100
+        delta = f"{pct:+.2f}%"
+
+    with cols[i % 4]:
+        st.metric(
+            label=f"{kw}(总指数)",
+            value=f"{int(latest['total_score']):,}",
+            delta=delta
+        )
+
+st.divider()
+
+# ======================
+# 趋势图(专业版)
+# ======================
+
+st.markdown("### 📈 趋势对比分析")
+
+compare_metric = st.selectbox(
+    "对比指标",
+    options=selected_metrics,
+    format_func=lambda x: FIELD_MAPPING[x]
+)
+
+fig = px.line(
+    df,
+    x="date",
+    y=compare_metric,
+    color="keyword",
+    title=f"各关键词【{FIELD_MAPPING[compare_metric]}】趋势对比"
+)
+
+# —— 核心专业配置 ——
+fig.update_traces(
+    mode="lines",
+    line=dict(width=3),
+    hovertemplate=(
+        "日期:%{x|%Y.%m.%d}<br>"
+        "关键词:%{text}<br>"
+        "指数:%{y:,.0f}"
+        "<extra></extra>"
+    ),
+    text=df['keyword']  # 使用 text 参数传递关键词
+)
+
+fig.update_layout(
+    hovermode="x unified",
+    margin=dict(t=80, l=60, r=40, b=50),
+
+    legend=dict(
+        orientation="h",
+        yanchor="bottom",
+        y=1.05,
+        xanchor="right",
+        x=1,
+        title="关键词(点击可隐藏)"
+    ),
+
+    xaxis=dict(
+        title="日期",
+        tickformat="%Y.%m.%d",
+        showgrid=False,
+        tickfont=dict(size=12)
+    ),
+
+    yaxis=dict(
+        title="指数",
+        showgrid=True,
+        gridcolor="rgba(255,255,255,0.06)",
+        tickfont=dict(size=12),
+        tickformat=",d"
+    )
+)
+fig.update_traces(
+    mode="lines",
+    line=dict(width=3),
+    hovertemplate=(
+        # "日期:%{x|%Y.%m.%d}<br>"
+        "关键词:%{text}<br>"
+        "指数:%{y:,.0f}"
+        "<extra></extra>"
+    ),
+    text=df['keyword']
+)
+
+st.plotly_chart(fig, width='stretch')
+
+# st.caption(
+#     "说明:微信指数为相对热度指标,用于趋势对比分析,不代表绝对搜索量。"
+# )
+
+# ======================
+# 原始数据
+# ======================
+
+with st.expander("📄 查看原始数据"):
+    display_df = df[['date', 'keyword'] + selected_metrics].copy()
+    display_df.columns = ['日期', '关键词'] + [FIELD_MAPPING[m] for m in selected_metrics]
+    display_df['日期'] = display_df['日期'].dt.strftime("%Y-%m-%d")
+
+    st.dataframe(
+        display_df.sort_values(['日期', '关键词'], ascending=[False, True]),
+        width='stretch',
+        hide_index=True
+    )

+ 4 - 2
deploy-jenkins.sh

@@ -14,14 +14,16 @@ echo "停止旧容器..."
 docker stop weindex || true
 docker rm weindex || true
 
-# 启动新容器
+# 启动新容器(添加端口映射和Streamlit配置)
 echo "启动新容器..."
 docker run -d \
   --name weindex \
   --restart unless-stopped \
+  -p 8501:8501 \
   -v $(pwd)/logs:/app/logs \
   --env-file .env \
-  weindex:latest
+  weindex:latest \
+  streamlit run dashboard/app.py --server.address=0.0.0.0 --server.port=8501
 
 echo "部署完成!"