Преглед изворни кода

Merge branch 'feature/luojunhui/20260127-crawl-article-detail' of Server/LongArticleTaskServer into master

luojunhui пре 1 месец
родитељ
комит
898a17600f

+ 47 - 15
README.md

@@ -22,6 +22,7 @@ docker compose up -d
 │   ├── __init__.py
 │   ├── ab_test
 │   │   ├── __init__.py
+│   │   ├── ab_accounts.py
 │   │   └── get_cover.py
 │   ├── api
 │   │   ├── __init__.py
@@ -35,11 +36,15 @@ docker compose up -d
 │   ├── config
 │   │   ├── __init__.py
 │   │   ├── aliyun_log_config.py
+│   │   ├── category_config.py
+│   │   ├── cold_start_config.py
 │   │   ├── deepseek_config.py
 │   │   ├── elastic_search_mappings.py
 │   │   ├── es_certs.crt
-│   │   └── mysql_config.py
+│   │   ├── mysql_config.py
+│   │   └── task_chinese_name.py
 │   ├── crawler
+│   │   ├── tophub
 │   │   ├── toutiao
 │   │   │   ├── __init__.py
 │   │   │   ├── blogger.py
@@ -50,6 +55,8 @@ docker compose up -d
 │   │   │   └── use_js.py
 │   │   └── wechat
 │   │       ├── __init__.py
+│   │       ├── gzh_article_stat.py
+│   │       ├── gzh_fans.py
 │   │       └── gzh_spider.py
 │   ├── database
 │   │   ├── __init__.py
@@ -60,51 +67,76 @@ docker compose up -d
 │   │   └── data_recycle_pipeline.py
 │   ├── service
 │   │   ├── __init__.py
-│   │   └── log_service.py
+│   │   ├── gzh_cookie_manager.py
+│   │   ├── log_service.py
+│   │   └── task_manager_service.py
 │   ├── tasks
 │   │   ├── __init__.py
+│   │   ├── algorithm_tasks
+│   │   │   ├── __init__.py
+│   │   │   ├── account_category_analysis.py
+│   │   │   └── models.py
+│   │   ├── analysis_task
+│   │   │   ├── __init__.py
+│   │   │   ├── account_position_info.py
+│   │   │   └── crawler_detail.py
 │   │   ├── cold_start_tasks
 │   │   │   ├── __init__.py
-│   │   │   └── article_pool_cold_start.py
+│   │   │   ├── article_pool
+│   │   │   │   ├── __init__.py
+│   │   │   │   ├── article_pool_cold_start_const.py
+│   │   │   │   ├── article_pool_cold_start_strategy.py
+│   │   │   │   └── article_pool_filter_strategy.py
+│   │   │   ├── article_pool_cold_start.py
+│   │   │   ├── video_pool
+│   │   │   │   ├── __init__.py
+│   │   │   │   ├── video_pool_audit_strategy.py
+│   │   │   │   └── video_pool_const.py
+│   │   │   └── video_pool_cold_start.py
 │   │   ├── crawler_tasks
 │   │   │   ├── __init__.py
+│   │   │   ├── crawler_account_manager.py
+│   │   │   ├── crawler_gzh.py
+│   │   │   ├── crawler_gzh_fans.py
 │   │   │   └── crawler_toutiao.py
 │   │   ├── data_recycle_tasks
 │   │   │   ├── __init__.py
-│   │   │   └── recycle_daily_publish_articles.py
+│   │   │   ├── article_detail_stat.py
+│   │   │   ├── recycle_daily_publish_articles.py
+│   │   │   ├── recycle_mini_program_detail.py
+│   │   │   └── recycle_outside_account_articles.py
 │   │   ├── llm_tasks
 │   │   │   ├── __init__.py
 │   │   │   ├── candidate_account_process.py
-│   │   │   └── process_title.py
+│   │   │   ├── process_title.py
+│   │   │   └── prompts.py
 │   │   ├── monitor_tasks
 │   │   │   ├── __init__.py
+│   │   │   ├── auto_reply_cards_monitor.py
+│   │   │   ├── cooperate_accounts_monitor.py
 │   │   │   ├── get_off_videos.py
 │   │   │   ├── gzh_article_monitor.py
 │   │   │   ├── kimi_balance.py
+│   │   │   ├── limited_account_analysis.py
 │   │   │   └── task_processing_monitor.py
+│   │   ├── task_config.py
+│   │   ├── task_handler.py
 │   │   ├── task_mapper.py
 │   │   ├── task_scheduler.py
-│   │   └── task_scheduler_v2.py
+│   │   └── task_utils.py
 │   └── utils
 │       ├── __init__.py
 │       ├── aigc_system_database.py
 │       ├── async_apollo_client.py
 │       ├── async_http_client.py
 │       ├── async_mysql_utils.py
+│       ├── async_tasks.py
 │       ├── common.py
 │       ├── get_cover.py
 │       ├── item.py
 │       └── response.py
-├── dev
-│   ├── code.py
-│   ├── dev.py
-│   ├── run_task_dev.py
-│   ├── sample.txt
-│   ├── title.json
-│   └── totp.py
-├── dev.py
 ├── docker-compose.yaml
-├── myapp.log
+├── jenkins_bash.sh
 ├── requirements.txt
 ├── routes
 │   ├── __init__.py

+ 28 - 6
applications/tasks/data_recycle_tasks/article_detail_stat.py

@@ -1,5 +1,6 @@
 import json
 import time
+import traceback
 from datetime import datetime, timedelta
 
 from applications.api import feishu_robot
@@ -33,9 +34,9 @@ class ArticleDetailStatConst:
 
 
 class ArticleDetailStatMapper(ArticleDetailStatConst):
-    def __init__(self, pool, log_client):
+    def __init__(self, pool, log_service):
         self.pool = pool
-        self.log_client = log_client
+        self.log_service = log_service
 
     # 获取账号信息
     async def fetch_monitor_accounts(self):
@@ -135,8 +136,8 @@ class ArticleDetailStatMapper(ArticleDetailStatConst):
 
 
 class ArticleDetailStat(ArticleDetailStatMapper):
-    def __init__(self, pool, log_client):
-        super().__init__(pool, log_client)
+    def __init__(self, pool, log_service):
+        super().__init__(pool, log_service)
 
     # 存储账号信息
     async def save_account_details(self, account, fetch_response):
@@ -282,7 +283,7 @@ class ArticleDetailStat(ArticleDetailStatMapper):
         # yesterday_string = datetime.strftime(datetime.now() - timedelta(days=5), "%Y-%m-%d")
         dt_list = [
             (datetime.now() - timedelta(days=i)).strftime("%Y-%m-%d")
-            for i in range(1, 31)
+            for i in range(1, 2)
         ]
         for dt in dt_list:
             print(f"{account['account_name']} crawl {dt} read_data")
@@ -304,5 +305,26 @@ class ArticleDetailStat(ArticleDetailStatMapper):
     # 入口函数
     async def deal(self):
         accounts = await self.fetch_monitor_accounts()
+        if not accounts:
+            return
+
         for account in accounts:
-            await self.process_single_account(account)
+            try:
+                await self.process_single_account(account)
+                await self.log_service.log(
+                    contents={
+                        "task": "article_detail_stat",
+                        "account_name": account["account_name"],
+                        "status": "success"
+                    }
+                )
+            except Exception as e:
+                await self.log_service.log(
+                    contents={
+                        "task": "article_detail_stat",
+                        "account_name": account["account_name"],
+                        "error": str(e),
+                        "traceback": traceback.format_exc(),
+                        "status": "fail"
+                    }
+                )

+ 8 - 0
applications/tasks/task_handler.py

@@ -15,6 +15,7 @@ from applications.tasks.crawler_tasks import WeixinAccountManager
 from applications.tasks.crawler_tasks import CrawlerGzhAccountArticles
 from applications.tasks.crawler_tasks import CrawlerGzhSearchArticles
 
+from applications.tasks.data_recycle_tasks import ArticleDetailStat
 from applications.tasks.data_recycle_tasks import RecycleDailyPublishArticlesTask
 from applications.tasks.data_recycle_tasks import RecycleOutsideAccountArticlesTask
 from applications.tasks.data_recycle_tasks import CheckDailyPublishArticlesTask
@@ -219,6 +220,13 @@ class TaskHandler:
         return TaskStatus.SUCCESS
 
     # ==================== 数据回收类任务 ====================
+    @register("article_detail_stat")
+    async def _article_detail_stat_handler(self) -> int:
+        """文章详情统计"""
+        task = ArticleDetailStat(self.db_client, self.log_client)
+        await task.deal()
+        return TaskStatus.SUCCESS
+
 
     @register("daily_publish_articles_recycle")
     async def _recycle_article_data_handler(self) -> int:

+ 3 - 4
jenkins_bash.sh

@@ -1,5 +1,4 @@
-REMOTE_HOST="192.168.205.169"
-REMOTE_USER="root"#!/bin/bash
+#!/bin/bash
 set -e
 
 echo "===== 1. 准备构建目录 ====="
@@ -33,9 +32,9 @@ echo "===== 5. 推送 Docker 镜像 ====="
 docker push ${FULL_IMAGE}
 
 echo "===== 6. 远程服务器部署 ====="
-REMOTE_HOST="192.168.205.169"
+REMOTE_HOST="192.168.142.66"
 REMOTE_USER="root"
-REMOTE_PASS="Qingqu@2024"
+REMOTE_PASS="piaoquan@2025"
 
 sshpass -p "${REMOTE_PASS}" ssh -o StrictHostKeyChecking=no \
   ${REMOTE_USER}@${REMOTE_HOST} \