1 miesiąc temu · 89b6dddc20
--- a/account_cold_start_daily.py
+++ b/account_cold_start_daily.py
@@ -8,29 +8,12 @@ from argparse import ArgumentParser
 
															 from applications import longArticlesMySQL, bot
														
 
															 from coldStartTasks.crawler.weixinCategoryCrawler import weixinCategory
														
 
															-from coldStartTasks.publish.publish_single_video_pool_videos import PublishSingleVideoPoolVideos
														
 
															 from coldStartTasks.publish.publishCategoryArticles import CategoryColdStartTask
														
 
															 from coldStartTasks.filter.title_similarity_task import ColdStartTitleSimilarityTask
														
 
															 DEFAULT_CATEGORY_LIST = ['1030-手动挑号', 'account_association']
														
 
															-def publish_single_video_task():
														
 
															-    """
														
 
															-    从视频内容池获取抓取
														
 
															-    """
														
 
															-    try:
														
 
															-        publish_single_video_pool_videos = PublishSingleVideoPoolVideos()
														
 
															-        publish_single_video_pool_videos.deal()
														
 
															-    except Exception as e:
														
 
															-        bot(
														
 
															-            title="视频内容池任务创建失败",
														
 
															-            detail={
														
 
															-                "error": str(e),
														
 
															-                "error_msg": traceback.format_exc()
														
 
															-            }
														
 
															-        )
														
 
															-
														
 
															 class AccountColdStartDailyTask(object):
														
 
															     """
														
@@ -127,21 +110,17 @@ def main(date_str, category_list=None, article_source=None):
 
															     main job, use crontab to do job daily
														
 
															     :return:
														
 
															     """
														
 
															-    # 首先发布视频内容池
														
 
															-    publish_single_video_task()
														
 
															-
														
 
															-    # 再处理文章内容池
														
 
															     if not category_list:
														
 
															         category_list = DEFAULT_CATEGORY_LIST
														
 
															     if not article_source:
														
 
															         article_source = 'weixin'
														
 
															     task = AccountColdStartDailyTask()
														
 
															     if task.init_db():
														
 
															-        # if article_source == 'weixin':
														
 
															-        #     task.crawler_task(category_list=category_list, date_str=date_str)
														
 
															-
														
 
															         task.publish_article_task(category_list=category_list, article_source=article_source)
														
 
															+        if article_source == 'weixin':
														
 
															+            task.crawler_task(category_list=category_list, date_str=date_str)
														
 
															+
														
 
															 if __name__ == '__main__':
														
 
															     parser = ArgumentParser()
														
@@ -153,9 +132,6 @@ if __name__ == '__main__':
 
															     else:
														
 
															         run_date = datetime.date.today().isoformat()
														
 
															-    # 执行微信抓取发布
														
 
															-    main(date_str=run_date)
														
 
															-
														
 
															     # 执行头条发布
														
 
															     main(
														
 
															         date_str=run_date,
														
@@ -163,4 +139,7 @@ if __name__ == '__main__':
 
															         article_source='toutiao'
														
 
															     )
														
 
															+    # 执行微信抓取发布
														
 
															+    main(date_str=run_date)
														
 
															+
														
--- a/coldStartTasks/publish/publish_article_pool_articles.py
+++ b/coldStartTasks/publish/publish_article_pool_articles.py
@@ -0,0 +1,37 @@
 
															+import datetime
														
 
															+import json
														
 
															+import time
														
 
															+import traceback
														
 
															+
														
 
															+from pandas import DataFrame
														
 
															+
														
 
															+from applications import aiditApi, log, bot
														
 
															+from applications.db import DatabaseConnector
														
 
															+from config import long_articles_config
														
 
															+
														
 
															+
														
 
															+class CategoryColdStartTask:
														
 
															+    def __init__(self):
														
 
															+        self.db_client = DatabaseConnector(long_articles_config)
														
 
															+        self.db_client.connect()
														
 
															+
														
 
															+    def insert_crawler_plan(self, crawler_plan_id,crawler_plan_name, create_timestamp):
														
 
															+        insert_query = f"""
														
 
															+            insert into article_crawler_plan (crawler_plan_id, name, create_timestamp) values (%s, %s, %s);
														
 
															+        """
														
 
															+        try:
														
 
															+            self.db_client.save(
														
 
															+                query=insert_query,
														
 
															+                params=(crawler_plan_id, crawler_plan_name, create_timestamp)
														
 
															+            )
														
 
															+        except Exception as e:
														
 
															+            bot(
														
 
															+                title="品类冷启任务，记录抓取计划id失败",
														
 
															+                detail={
														
 
															+                    "error": str(e),
														
 
															+                    "error_msg": traceback.format_exc(),
														
 
															+                    "crawler_plan_id": crawler_plan_id,
														
 
															+                    "crawler_plan_name": crawler_plan_name
														
 
															+                }
														
 
															+            )
														
 
															+
														
--- a/cold_start_publish_to_aigc.py
+++ b/cold_start_publish_to_aigc.py
@@ -0,0 +1,6 @@
 
															+from tasks.publish_tasks.cold_start_publish_daily import ColdStartPublishDailyTask
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    cold_start_publish_daily_task = ColdStartPublishDailyTask()
														
 
															+    cold_start_publish_daily_task.publish_articles_from_video_pool()
														
--- a/crawler_sohu_videos_task.py
+++ b/crawler_sohu_videos_task.py
@@ -1,14 +0,0 @@
 
															-from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import CrawlerSohuHotVideos
														
 
															-from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import CrawlerSohuRecommendVideos
														
 
															-
														
 
															-def main():
														
 
															-    # step1, crawl sohu hot videos
														
 
															-    crawler_sohu_hot_videos = CrawlerSohuHotVideos()
														
 
															-    crawler_sohu_hot_videos.deal()
														
 
															-
														
 
															-    # step2, crawl sohu recommend videos
														
 
															-    crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
														
 
															-    crawler_sohu_recommend_videos.deal()
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    main()
														
--- a/not_used_tasks/article_association_task.py
+++ b/not_used_tasks/article_association_task.py
--- a/sh/run_cold_start_publish.sh
+++ b/sh/run_cold_start_publish.sh
@@ -0,0 +1,26 @@
 
															+#!/bin/bash
														
 
															+
														
 
															+# 获取当前日期，格式为 YYYY-MM-DD
														
 
															+CURRENT_DATE=$(date +%F)
														
 
															+
														
 
															+# 日志文件路径，包含日期
														
 
															+LOG_FILE="/root/luojunhui/logs/cold_start_publish_log_$CURRENT_DATE.txt"
														
 
															+
														
 
															+# 重定向整个脚本的输出到带日期的日志文件
														
 
															+exec >> "$LOG_FILE" 2>&1
														
 
															+if pgrep -f "python3 cold_start_publish_to_aigc.py" > /dev/null
														
 
															+then
														
 
															+    echo "$(date '+%Y-%m-%d %H:%M:%S') - cold_start_publish_to_aigc.py is running"
														
 
															+else
														
 
															+    echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart cold_start_publish_to_aigc.py"
														
 
															+    # 切换到指定目录
														
 
															+    cd /root/luojunhui/LongArticlesJob
														
 
															+
														
 
															+    # 激活 Conda 环境
														
 
															+    source /root/miniconda3/etc/profile.d/conda.sh
														
 
															+    conda activate tasks
														
 
															+
														
 
															+    # 在后台运行 Python 脚本并重定向日志输出
														
 
															+    nohup python3 cold_start_publish_to_aigc.py >> "${LOG_FILE}" 2>&1 &
														
 
															+    echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted cold_start_publish_to_aigc.py"
														
 
															+fi
														
--- a/sh/run_article_association.sh
+++ b/sh/run_article_association.sh
@@ -4,15 +4,15 @@
 
															 CURRENT_DATE=$(date +%F)
														
 
															 # 日志文件路径，包含日期
														
 
															-LOG_FILE="/root/luojunhui/logs/article_association_crawler_log_$CURRENT_DATE.txt"
														
 
															+LOG_FILE="/root/luojunhui/logs/schedule_app_log_$CURRENT_DATE.txt"
														
 
															 # 重定向整个脚本的输出到带日期的日志文件
														
 
															 exec >> "$LOG_FILE" 2>&1
														
 
															-if pgrep -f "python3 article_association_task.py" > /dev/null
														
 
															+if pgrep -f "python3 schedule_app.py" > /dev/null
														
 
															 then
														
 
															-    echo "$(date '+%Y-%m-%d %H:%M:%S') - article_association_task.py is running"
														
 
															+    echo "$(date '+%Y-%m-%d %H:%M:%S') - schedule_app.py is running"
														
 
															 else
														
 
															-    echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart article_association_task.py"
														
 
															+    echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart schedule_app.py"
														
 
															     # 切换到指定目录
														
 
															     cd /root/luojunhui/LongArticlesJob
														
@@ -21,6 +21,6 @@ else
 
															     conda activate tasks
														
 
															     # 在后台运行 Python 脚本并重定向日志输出
														
 
															-    nohup python3 article_association_task.py >> "${LOG_FILE}" 2>&1 &
														
 
															-    echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted article_association_task.py"
														
 
															+    nohup python3 schedule_app.py >> "${LOG_FILE}" 2>&1 &
														
 
															+    echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted schedule_app.py"
														
 
															 fi
														
--- a/tasks/publish_tasks/cold_start_publish_daily.py
+++ b/tasks/publish_tasks/cold_start_publish_daily.py
@@ -0,0 +1,47 @@
 
															+import json
														
 
															+import traceback
														
 
															+
														
 
															+from applications import bot
														
 
															+from applications.db import DatabaseConnector
														
 
															+from config import long_articles_config
														
 
															+from coldStartTasks.publish.publish_single_video_pool_videos import PublishSingleVideoPoolVideos
														
 
															+
														
 
															+
														
 
															+class ColdStartPublishDailyTask:
														
 
															+
														
 
															+    def __init__(self):
														
 
															+        self.db_client = DatabaseConnector(long_articles_config)
														
 
															+        self.db_client.connect()
														
 
															+
														
 
															+
														
 
															+    def publish_articles_from_article_pool(self):
														
 
															+        """
														
 
															+        从 meta_article_pool 表中获取文章，发布到 AIGC 平台
														
 
															+        """
														
 
															+        # publish_article_task = CategoryColdStartTask(db_client=self.db_client)
														
 
															+        # 执行浸提头条品类发布
														
 
															+
														
 
															+
														
 
															+        pass
														
 
															+
														
 
															+
														
 
															+    def publish_articles_from_video_pool(self):
														
 
															+        """
														
 
															+        从 meta_video_pool 表中获取视频，发布到 AIGC 平台
														
 
															+        """
														
 
															+        try:
														
 
															+            publish_single_video_pool_videos = PublishSingleVideoPoolVideos()
														
 
															+            publish_single_video_pool_videos.deal()
														
 
															+        except Exception as e:
														
 
															+            bot(
														
 
															+                title="视频内容池任务创建失败",
														
 
															+                detail={
														
 
															+                    "error": str(e),
														
 
															+                    "error_msg": traceback.format_exc()
														
 
															+                }
														
 
															+            )
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
--- a/update_published_articles_v2.py
+++ b/update_published_articles_v2.py
@@ -1,34 +0,0 @@
 
															-"""
														
 
															-@author: luojunhui
														
 
															-"""
														
 
															-from argparse import ArgumentParser
														
 
															-
														
 
															-from tasks.update_published_articles_read_detail import UpdatePublishedArticlesReadDetail
														
 
															-
														
 
															-
														
 
															-def main():
														
 
															-    """
														
 
															-    update mini program detail main
														
 
															-    :return:
														
 
															-    """
														
 
															-    parser = ArgumentParser()
														
 
															-    parser.add_argument("--run-date",
														
 
															-                        help="Run only once for date in format of %Y-%m-%d. \
														
 
															-                            If no specified, run as daily jobs.")
														
 
															-    args = parser.parse_args()
														
 
															-
														
 
															-    update_publish_articles_task = UpdatePublishedArticlesReadDetail()
														
 
															-    update_publish_articles_task.init_database()
														
 
															-
														
 
															-    if args.run_date:
														
 
															-        update_publish_articles_task.update_job(args.run_date)
														
 
															-        update_publish_articles_task.check_job(args.run_date)
														
 
															-    else:
														
 
															-        update_publish_articles_task.update_job()
														
 
															-        update_publish_articles_task.check_job()
														
 
															-
														
 
															-    update_publish_articles_task.get_article_detail_job()
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    main()