Prechádzať zdrojové kódy

Merge branch '2025-02-27-backupnlp-improve' of luojunhui/LongArticlesJob into master

luojunhui 1 mesiac pred
rodič
commit
b51a0879a9
2 zmenil súbory, kde vykonal 54 pridanie a 26 odobranie
  1. 27 19
      applications/api/nlp_api.py
  2. 27 7
      config/crontab_backup

+ 27 - 19
applications/api/nlp_api.py

@@ -2,9 +2,10 @@
 @author: luojunhui
 """
 import requests
+import traceback
 from requests.exceptions import RequestException, JSONDecodeError
 
-from applications.feishuBotApi import bot
+from applications.aliyunLogApi import log
 
 
 def similarity_between_title_list(target_title_list: list[str], base_title_list: list[str]) -> list[list[float]]:
@@ -31,24 +32,29 @@ def similarity_between_title_list(target_title_list: list[str], base_title_list:
         if response.status_code != 200:
             response = requests.post(url_backup, json=body, timeout=120)
     except RequestException as e:
-        bot(
-            title='NLP API 网络异常',
-            detail={
-                "error_type": type(e).__name__,
-                "error_msg": str(e)
-            },
-            mention=False
+        log(
+            task="nlp",
+            function="similarity_between_title_list",
+            status="fail",
+            message="nlp server web error",
+            data={
+                "e": str(e),
+                "error_msg": traceback.format_exc()
+            }
         )
-        return []
+        # use back up
+        response = requests.post(url_backup, json=body, timeout=120)
 
     if response.status_code != 200:
-        bot(
-            title='NLP API 业务异常',
-            detail={
+        log(
+            task="nlp",
+            function="similarity_between_title_list",
+            status="fail",
+            message='nlp server request error',
+            data={
                 "status_code": response.status_code,
                 "response_text": response.text[:200]  # 截取部分内容避免过大
-            },
-            mention=False
+            }
         )
         return []
 
@@ -56,13 +62,15 @@ def similarity_between_title_list(target_title_list: list[str], base_title_list:
         response_json = response.json()
         score_array = response_json['score_list_list']
     except (JSONDecodeError, KeyError) as e:
-        bot(
-            title='NLP响应数据异常',
-            detail={
+        log(
+            task="nlp",
+            function="similarity_between_title_list",
+            status="fail",
+            message='nlp server response error',
+            data={
                 "error_type": type(e).__name__,
                 "raw_response": response.text[:200]
-            },
-            mention=False
+            }
         )
         return []
 

+ 27 - 7
config/crontab_backup

@@ -1,11 +1,23 @@
-# 凌晨1点30执行更新小程序信息任务
-30 1 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_minigram_info_daily.sh
+# 每15分钟执行一次今日头条推荐流抓取
+*/15 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_toutiao_recommend.sh
+
+# 每10分钟执行一次从aigc系统获取发布文章
+*/10 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_article_info_from_aigc.sh
+
+# 每10分钟执行一次标题相似度计算任务
+*/10 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_title_similarity_task.sh
+
+# 凌晨2点30执行更新小程序信息任务
+30 2 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_minigram_info_daily.sh
 
 # 每天上午10点30执行文章退场 && 晋升任务
 30 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_article_title_exit_v1.sh
 
 # 每天上午4点执行账号冷启动任务
-0 4 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_cold_start_daily.sh
+0 1 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_cold_start_daily.sh
+
+# 每日上午9点执行账号联想任务
+0 9 * * * bash /root/luojunhui/LongArticlesJob/sh/run_account_association.sh
 
 # 每天 10 点执行前一天的阅读率均值代码
 0 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_account_read_rate_avg.sh
@@ -13,18 +25,24 @@
 # 每天10点40执行阅读均值任务
 40 10 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_account_avg_v3.sh
 
+# 每天11点执行文章联想任务
+0 11 * * * bash /root/luojunhui/LongArticlesJob/sh/run_article_association.sh
+
 # 每小时执行一次校验视频状态
 20 * * * * bash /root/luojunhui/LongArticlesJob/sh/run_check_video_status_hourly.sh
 
-# 每天凌晨4:30, 8:30, 15:30执行视频发布和审核流程
-30 4,8,15 * * * bash /root/luojunhui/LongArticlesJob/sh/run_video_publish_and_audit.sh
+# 每天凌晨4:30 15:30执行视频发布和审核流程
+30 4,15 * * * bash /root/luojunhui/LongArticlesJob/sh/run_video_publish_and_audit.sh
 
 # 每天 上午8:30, 下午1:00, 晚上8:50执行
 
 30 8 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily.sh
-20 13 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily.sh
+20 14 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily.sh
 50 20 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily.sh
 
+# 每天上午9点,下午2点,晚上9点执行v2代码
+# 0 9,14,21 * * * bash /root/luojunhui/LongArticlesJob/sh/run_update_published_articles_daily_v2.sh
+
 
 # 每天上午 9:30 点,下午 2 点,晚上 7 点执行下架视频任务
 
@@ -37,8 +55,10 @@
 
 # 每天早上9点,下午2:30, 晚上7:30
 
-30 9,14 * * * bash /root/luojunhui/LongArticlesJob/sh/published_articles_monitor.sh
+0 10,16,20 * * * bash /root/luojunhui/LongArticlesJob/sh/published_articles_monitor.sh
 
+# 每晚11点开始执行百度视频
+0 23 * * * bash /root/luojunhui/LongArticlesJob/sh/run_baidu_video_crawler.sh
 
 # check kimo balance hourly