Browse Source

Merge branch '2024-11-13-luojunhui-fix-kimi' of Server/title_with_video into 2024-09-23newDbTasks

luojunhui 5 months ago
parent
commit
6999c64cf3

+ 1 - 1
applications/functions/kimi.py

@@ -205,7 +205,7 @@ class KimiServer(object):
             ],
             model="moonshot-v1-32k",
         )
-        response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
+        response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '').replace('“', '"').replace('”', '"')
         try:
             response = json.loads(response)
             return response

+ 2 - 2
applications/search/dy_search.py

@@ -17,7 +17,7 @@ def douyin_search(keyword, sensitive_words, trace_id):
     :param keyword: the words needs to be searched
     :return:
     """
-    url = "http://8.217.190.241:8888/crawler/dou_yin/top_hub_content"
+    url = "http://crawler-cn.aiddit.com/crawler/dou_yin/top_hub_content"
     payload = json.dumps({
         "keyword": keyword,
         "category": "全部",
@@ -78,7 +78,7 @@ def douyin_detail(video_id):
     :param video_id:
     :return:
     """
-    url = "http://8.217.190.241:8888/crawler/dou_yin/detail"
+    url = "http://crawler-cn.aiddit.com/crawler/dou_yin/detail"
     payload = json.dumps({
         "content_id": video_id
     })

+ 6 - 0
server/api/response.py

@@ -270,6 +270,12 @@ class Response(object):
                     "traceId": self.trace_id,
                     "miniprogramList": card_list
                 }
+            case 95:
+                return {
+                    "traceId": self.trace_id,
+                    "code": 95,
+                    "error": "该文章被kimi识别为高风险文章,不处理"
+                }
             case 96:
                 return {
                     "traceId": self.trace_id,

+ 33 - 1
tasks/newContentIdTask.py

@@ -13,6 +13,7 @@ from applications.functions.common import shuffle_list
 from applications.functions.kimi import KimiServer
 from applications.spider import search_videos_from_web
 from applications.etl_function import *
+from applications.feishu import bot
 
 
 class NewContentIdTask(object):
@@ -26,6 +27,7 @@ class NewContentIdTask(object):
     TASK_PUBLISHED_STATUS = 4
     TASK_PROCESSING_STATUS = 101
     TASK_FAIL_STATUS = 99
+    KIMI_ILLEGAL_STATUS = 95
     ARTICLE_TEXT_TABLE_ERROR = 98
     TASK_MAX_PROCESS_TIMES = 3
 
@@ -676,6 +678,9 @@ class NewContentIdTask(object):
         # time.sleep(5) # 测试多个进程操作同一个 task 的等待时间
         kimi_result = await self.kimi_task(params)
         trace_id = params['trace_id']
+        process_times = params['process_times']
+        content_id = params['content_id']
+        print(kimi_result)
         if kimi_result:
             # 等待 kimi 操作执行完成之后,开始执行 spider_task
             print("kimi success")
@@ -731,8 +736,35 @@ class NewContentIdTask(object):
             logging(
                 code="6001",
                 info="kimi 处理失败",
-                trace_id=params['trace_id']
+                trace_id=trace_id
             )
+            if process_times >= self.TASK_MAX_PROCESS_TIMES:
+                logging(
+                    code="6011",
+                    info="kimi处理次数达到上限, 放弃处理",
+                    trace_id=trace_id
+                )
+                # 将相同的content_id && content_status = 0的状态修改为kimi 失败状态
+                update_sql = f"""
+                    UPDATE {self.article_match_video_table}
+                    SET content_status = %s
+                    WHERE content_id = %s and content_status = %s;
+                """
+                affected_rows = await self.mysql_client.async_insert(
+                    sql=update_sql,
+                    params=(
+                        self.KIMI_ILLEGAL_STATUS,
+                        content_id,
+                        self.TASK_INIT_STATUS
+                    )
+                )
+                bot(
+                    title="KIMI 处理失败",
+                    detail={
+                        "content_id": content_id,
+                        "affected_rows": affected_rows
+                    }
+                )
 
     async def process_task(self, params):
         """