Browse Source

add other tasks

luojunhui 1 month ago
parent
commit
fa46b8c989

+ 0 - 56
kimi_balance_monitor.py

@@ -1,56 +0,0 @@
-"""
-@author: luojunhui
-"""
-import requests
-import traceback
-
-from applications import bot
-from applications.decoratorApi import retryOnTimeout
-
-BALANCE_LIMIT_THRESHOLD = 200.0
-
-
-@retryOnTimeout(retries=5, delay=5)
-def check_kimi_balance():
-    """
-    校验kimi余额
-    :return:
-    """
-    url = "https://api.moonshot.cn/v1/users/me/balance"
-
-    payload = {}
-    headers = {
-        'Authorization': 'Bearer sk-5DqYCa88kche6nwIWjLE1p4oMm8nXrR9kQMKbBolNAWERu7q'
-    }
-    response = requests.request("GET", url, headers=headers, data=payload, timeout=10)
-    if response.status_code == 200:
-        response_json = response.json()
-        try:
-            balance = response_json['data']['available_balance']
-            if balance < BALANCE_LIMIT_THRESHOLD:
-                bot(
-                    title="kimi余额小于 {} 块".format(BALANCE_LIMIT_THRESHOLD),
-                    detail={
-                        "balance": balance
-                    }
-                )
-        except Exception as e:
-            error_stack = traceback.format_exc()
-            bot(
-                title="kimi余额接口处理失败,数据结构异常",
-                detail={
-                    "error": str(e),
-                    "error_msg": error_stack
-                }
-            )
-    else:
-        bot(
-            title="kimi余额接口调用失败",
-            detail={
-                "response": response.text
-            }
-        )
-
-
-if __name__ == '__main__':
-    check_kimi_balance()

+ 50 - 25
long_articles_job.py

@@ -1,16 +1,22 @@
 from argparse import ArgumentParser
 
+from cold_start.crawler.baidu import BaiduVideoCrawler
+from tasks.ai_tasks import run_title_similarity_task
 from tasks.crawler_tasks.crawler_video.crawler_piaoquan_videos import (
     CrawlerPiaoQuanVideos,
 )
-from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import CrawlerSohuHotVideos
+from tasks.crawler_tasks.crawler_video.crawler_toutiao_videos import (
+    CrawlerToutiaoAccountVideos,
+)
 from tasks.crawler_tasks.crawler_video.crawler_sohu_videos import (
     CrawlerSohuRecommendVideos,
+    CrawlerSohuHotVideos,
 )
 from tasks.crawler_tasks.crawler_video.crawler_sph_videos import (
     CrawlerChannelAccountVideos,
 )
 from tasks.crawler_tasks.crawler_video.crawler_gzh_videos import CrawlerGzhMetaVideos
+
 from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishRecordManager
 from tasks.data_tasks.fwh_data_recycle import SaveFwhDataToDatabase
 from tasks.data_tasks.fwh_data_recycle import FwhGroupPublishMonitor
@@ -20,24 +26,41 @@ from tasks.publish_tasks.top_article_generalize import (
 )
 
 
-def run_piaoquan_video_crawler():
-    crawler = CrawlerPiaoQuanVideos()
-    crawler.deal()
+class CrawlerTasks:
+    @classmethod
+    def run_piaoquan_video_crawler(cls):
+        crawler = CrawlerPiaoQuanVideos()
+        crawler.deal()
+
+    @classmethod
+    def run_sohu_video_crawler(cls):
+        # step1, crawl sohu hot videos
+        crawler_sohu_hot_videos = CrawlerSohuHotVideos()
+        crawler_sohu_hot_videos.deal()
 
+        # step2, crawl sohu recommend videos
+        crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
+        crawler_sohu_recommend_videos.deal()
 
-def run_sohu_video_crawler():
-    # step1, crawl sohu hot videos
-    crawler_sohu_hot_videos = CrawlerSohuHotVideos()
-    crawler_sohu_hot_videos.deal()
+    @classmethod
+    def run_sph_video_crawler(cls):
+        crawler_channel_account_videos = CrawlerChannelAccountVideos()
+        crawler_channel_account_videos.deal()
 
-    # step2, crawl sohu recommend videos
-    crawler_sohu_recommend_videos = CrawlerSohuRecommendVideos()
-    crawler_sohu_recommend_videos.deal()
+    @classmethod
+    def crawler_gzh_meta_videos(cls):
+        task = CrawlerGzhMetaVideos()
+        task.deal()
 
+    @classmethod
+    def run_toutiao_video_crawler(cls):
+        crawler = CrawlerToutiaoAccountVideos()
+        crawler.deal()
 
-def run_sph_video_crawler():
-    crawler_channel_account_videos = CrawlerChannelAccountVideos()
-    crawler_channel_account_videos.deal()
+    @classmethod
+    def run_baidu_video_crawler(cls):
+        task = BaiduVideoCrawler()
+        task.deal()
 
 
 def run_fwh_data_manager():
@@ -59,15 +82,11 @@ def run_top_article_generalize_from_article_pool():
     task.deal()
 
 
-def crawler_gzh_meta_videos():
-    task = CrawlerGzhMetaVideos()
-    task.deal()
-
-
 def main():
     """
     run long_articles_job
     """
+    crawler = CrawlerTasks()
     parser = ArgumentParser()
     parser.add_argument("--task_name", help="which task you want to run")
     parser.add_argument("--run_date", help="task specify run date")
@@ -81,19 +100,25 @@ def main():
     else:
         match task_name:
             case "run_piaoquan_video_crawler":
-                run_piaoquan_video_crawler()
+                crawler.run_piaoquan_video_crawler()
             case "run_sohu_video_crawler":
-                run_sohu_video_crawler()
+                crawler.run_sohu_video_crawler()
+            case "run_sph_video_crawler":
+                crawler.run_sph_video_crawler()
+            case "crawler_gzh_meta_videos":
+                crawler.crawler_gzh_meta_videos()
+            case "run_toutiao_video_crawler":
+                crawler.run_toutiao_video_crawler()
+            case "run_baidu_video_crawler":
+                crawler.run_baidu_video_crawler()
             case "run_check_kimi_balance":
                 check_kimi_balance()
             case "run_fwh_data_manager":
                 run_fwh_data_manager()
-            case "run_sph_video_crawler":
-                run_sph_video_crawler()
+            case "run_title_similarity_task":
+                run_title_similarity_task()
             case "top_article_generalize":
                 run_top_article_generalize_from_article_pool()
-            case "crawler_gzh_meta_videos":
-                crawler_gzh_meta_videos()
             case _:
                 print("task_name cannot be None")
 

+ 0 - 8
run_baidu_video_crawler.py

@@ -1,8 +0,0 @@
-"""
-@author: luojunhui
-"""
-from cold_start.crawler.baidu import BaiduVideoCrawler
-
-if __name__ == '__main__':
-    task = BaiduVideoCrawler()
-    task.deal()

+ 16 - 4
sh/run_long_articles_job.sh

@@ -34,13 +34,25 @@ on_failure(){
 # 语法: "分 时 日 月 周|任务名|日志模板"
 # 支持 *、*/n、a-b、a,b,c 以及它们组合
 TASKS=(
-  "0 3 * * *|run_sph_video_crawler|${LOG_DIR}/run_sph_video_crawler/%Y-%m-%d.log"
+  # 视频号视频抓取
+  "0 3,15 * * *|run_sph_video_crawler|${LOG_DIR}/run_sph_video_crawler/%Y-%m-%d.log"
+  # 票圈站内视频抓取
   "0 6 * * *|run_piaoquan_video_crawler|${LOG_DIR}/run_piaoquan_video_crawler/%Y-%m-%d.log"
+  # 搜狐视频抓取
   "10 6 * * *|run_sohu_video_crawler|${LOG_DIR}/run_sohu_video_crawler/%Y-%m-%d.log"
+  # top文章泛化作为供给
   "20 11 * * *|top_article_generalize|${LOG_DIR}/top_article_generalize/%Y-%m-%d.log"
-  "0 15 * * *|run_sph_video_crawler|${LOG_DIR}/run_sph_video_crawler/%Y-%m-%d.log"
-  # 示例:每分钟执行
-  # "* * * * *|heartbeat|${LOG_DIR}/heartbeat/%Y-%m-%d.log"
+  # 校验kimi剩余额度
+  "30 * * * *|run_check_kimi_balance|${LOG_DIR}/run_check_kimi_balance/%Y-%m-%d.log"
+  # 服务号数据回收
+  "0 11,17 * * *|run_fwh_data_manager|${LOG_DIR}/run_fwh_data_manager/%Y-%m-%d.log"
+  # 标题相似度任务
+  "*/10 * * * *|run_title_similarity_task|${LOG_DIR}/run_title_similarity_task/%Y-%m-%d.log"
+  # 头条视频抓取
+  "0 4,16 * * *|run_toutiao_video_crawler|${LOG_DIR}/run_toutiao_video_crawler/%Y-%m-%d.log"
+  # 百度视频抓取
+  "20 0,12 * * *|run_baidu_video_crawler|${LOG_DIR}/run_baidu_video_crawler/%Y-%m-%d.log"
+
 )
 
 ###################### 工具函数 ######################

+ 1 - 0
tasks/ai_tasks/__init__.py

@@ -0,0 +1 @@
+from .title_similarity_score_task import run_title_similarity_task

+ 1 - 1
title_similarity_score_task.py → tasks/ai_tasks/title_similarity_score_task.py

@@ -6,7 +6,7 @@ from applications import bot
 from cold_start.filter.title_similarity_task import ColdStartTitleSimilarityTask
 
 
-if __name__ == '__main__':
+def run_title_similarity_task():
     batch_size = 3000
     task = ColdStartTitleSimilarityTask()
     task.init_database()

+ 0 - 0
run_video_understanding_with_google.py → tasks/not_used_tasks/run_video_understanding_with_google.py


+ 0 - 10
toutiao_video_crawler.py

@@ -1,10 +0,0 @@
-"""
-@author: luojunhui
-"""
-
-from tasks.crawler_tasks.crawler_video.crawler_toutiao_videos import CrawlerToutiaoAccountVideos
-
-
-if __name__ == '__main__':
-    crawler = CrawlerToutiaoAccountVideos()
-    crawler.deal()