فهرست منبع

Merge branch '2025-02-27-update-article-association' of luojunhui/LongArticlesJob into master

luojunhui 7 ماه پیش
والد
کامیت
1a7d8e6399
3فایلهای تغییر یافته به همراه55 افزوده شده و 3 حذف شده
  1. 1 1
      applications/const/__init__.py
  2. 28 2
      article_association_task.py
  3. 26 0
      sh/run_article_association.sh

+ 1 - 1
applications/const/__init__.py

@@ -48,7 +48,7 @@ class ColdStartTaskConst:
     BULK_PUBLISH_TYPE = 9
 
     # 种子文章数量
-    SEED_ARTICLE_LIMIT_NUM = 30
+    SEED_ARTICLE_LIMIT_NUM = 60
 
 
 class updatePublishedMsgTaskConst:

+ 28 - 2
article_association_task.py

@@ -1,9 +1,12 @@
 """
 @author: luojunhui
 """
+import traceback
 from argparse import ArgumentParser
 
+from applications import bot
 from coldStartTasks.crawler.wechat import ArticleAssociationCrawler
+from coldStartTasks.publish.publish_article_association_articles import ArticleAssociationPublish
 
 
 def main():
@@ -18,9 +21,32 @@ def main():
         biz_date = args.biz_date
     else:
         biz_date = None
+    try:
+        article_association_crawler = ArticleAssociationCrawler()
+        article_association_crawler.deal(biz_date=biz_date)
+    except Exception as e:
+        bot(
+            title="It occurred an Exception in ArticleAssociationCrawler",
+            detail={
+                "Error": str(e),
+                "Traceback": traceback.format_exc()
+            },
+            mention=False,
+        )
 
-    article_association_crawler = ArticleAssociationCrawler()
-    article_association_crawler.deal(biz_date=biz_date)
+    # publish
+    try:
+        article_association_publish = ArticleAssociationPublish()
+        article_association_publish.deal()
+    except Exception as e:
+        bot(
+            title="It occurred an Exception in ArticleAssociationPublish",
+            detail={
+                "Error": str(e),
+                "Traceback": traceback.format_exc()
+            },
+            mention=False,
+        )
 
 
 if __name__ == "__main__":

+ 26 - 0
sh/run_article_association.sh

@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# 获取当前日期,格式为 YYYY-MM-DD
+CURRENT_DATE=$(date +%F)
+
+# 日志文件路径,包含日期
+LOG_FILE="/root/luojunhui/logs/article_association_crawler_log_$CURRENT_DATE.txt"
+
+# 重定向整个脚本的输出到带日期的日志文件
+exec >> "$LOG_FILE" 2>&1
+if pgrep -f "python3 article_association_task.py" > /dev/null
+then
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - article_association_task.py is running"
+else
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart article_association_task.py"
+    # 切换到指定目录
+    cd /root/luojunhui/LongArticlesJob
+
+    # 激活 Conda 环境
+    source /root/miniconda3/etc/profile.d/conda.sh
+    conda activate tasks
+
+    # 在后台运行 Python 脚本并重定向日志输出
+    nohup python3 article_association_task.py >> "${LOG_FILE}" 2>&1 &
+    echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted article_association_task.py"
+fi