Преглед на файлове

Merge branch 'feature/luojunhui/20260121-crawl-cooperate-accounts' of Server/LongArticleTaskServer into master

luojunhui преди 1 месец
родител
ревизия
65020867db
променени са 1 файла, в които са добавени 31 реда и са изтрити 14 реда
  1. 31 14
      applications/tasks/monitor_tasks/cooperate_accounts_monitor.py

+ 31 - 14
applications/tasks/monitor_tasks/cooperate_accounts_monitor.py

@@ -7,7 +7,7 @@ from tqdm import tqdm
 from datetime import datetime, timedelta
 from urllib.parse import unquote, parse_qs, urlparse
 
-from applications.utils import fetch_from_odps
+from applications.utils import fetch_from_odps, show_desc_to_sta
 from applications.crawler.wechat import get_article_list_from_account
 from applications.crawler.wechat import get_article_detail
 
@@ -24,12 +24,12 @@ class CooperateAccountsMonitorTaskConst:
     HAS_MINI_PROGRAM = 1
     DONT_HAS_MINI_PROGRAM = 0
 
-    ARTICLE_NUM = 200
+    ARTICLE_NUM = 100
 
 
 class CooperateAccountsMonitorTaskUtils(CooperateAccountsMonitorTaskConst):
     @staticmethod
-    def get_monitor_account_list():
+    def get_uv_account_list():
         # dt = (datetime.today() - timedelta(days=1)).strftime("%Y%m%d")
         week_ago = (datetime.today() - timedelta(days=7)).strftime("%Y-%m-%d %H:%M:%S")
         query = f"""
@@ -272,6 +272,7 @@ class CooperateAccountsMonitorTask(CooperateAccountsMonitorMapper):
             base_info = group_article["AppMsg"]["BaseInfo"]
             detail_info = group_article["AppMsg"]["DetailInfo"]
             for single_article in detail_info:
+                show_stat = show_desc_to_sta(single_article.get("ShowDesc", None))
                 single_param = (
                     gh_id,
                     account_name,
@@ -284,13 +285,15 @@ class CooperateAccountsMonitorTask(CooperateAccountsMonitorMapper):
                     single_article["Digest"],
                     single_article["send_time"],
                     self.extract_wx_sn(single_article["ContentUrl"]),
+                    show_stat.get("show_view_count", 0),
+                    show_stat.get("show_like_count", 0)
                 )
                 params.append(single_param)
 
         query = """
             INSERT IGNORE INTO cooperate_accounts_daily_detail
-                (gh_id, account_name, app_msg_id, publish_type, position, article_title, article_link, article_cover, article_desc, publish_timestamp, wx_sn)
-            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+                (gh_id, account_name, app_msg_id, publish_type, position, article_title, article_link, article_cover, article_desc, publish_timestamp, wx_sn, read_cnt, like_cnt)
+            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
         """
         await self.pool.async_save(query=query, params=params, batch=True)
 
@@ -338,13 +341,13 @@ class CooperateAccountsMonitorTask(CooperateAccountsMonitorMapper):
                 pass
 
     # 获取待处理的文章
-    async def get_article_list(self):
+    async def get_article_list(self, account_name_tuple):
         query = """
             SELECT wx_sn, article_link FROM cooperate_accounts_daily_detail 
-            WHERE fetch_status = %s ORDER BY position LIMIT %s;
+            WHERE fetch_status = %s AND account_name IN %s ORDER BY position LIMIT %s;
         """
         return await self.pool.async_fetch(
-            query=query, params=(self.INIT_STATUS, self.ARTICLE_NUM)
+            query=query, params=(self.INIT_STATUS, account_name_tuple, self.ARTICLE_NUM)
         )
 
     # 入口函数
@@ -364,10 +367,24 @@ class CooperateAccountsMonitorTask(CooperateAccountsMonitorMapper):
 
 
             case "get_detail":
-                article_list = await self.get_article_list()
-                for article in tqdm(article_list, desc="处理文章详情"):
-                    try:
-                        await self.set_article_detail(article)
+                has_uv_accounts = self.get_uv_account_list()
+                has_uv_name_list = []
+                for i in has_uv_accounts:
+                    account_name = i.公众号名
+                    if account_name:
+                        has_uv_name_list.append(account_name)
+
+                if has_uv_name_list:
+                    account_name_tuple = tuple(has_uv_name_list)
+                    article_list = await self.get_article_list(account_name_tuple)
+                    for article in tqdm(article_list, desc="处理文章详情"):
+                        try:
+                            await self.set_article_detail(article)
+
+                        except Exception as e:
+                            print(f"获取文章详情失败-{article['article_link']}-{e}")
+
+                else:
+                    print("没有需要处理详情的账号")
+                    return
 
-                    except Exception as e:
-                        print(f"获取文章详情失败-{article['article_link']}-{e}")