Переглянути джерело

Merge branch '2025-02-05-change-video-limit' of Server/title_with_video into 2025-01-31-luojunhui-match-desc

luojunhui 2 місяців тому
батько
коміт
49af23056a

+ 4 - 1
applications/const/task_const.py

@@ -48,6 +48,9 @@ class HistoryContentIdTaskConst:
     VIDEO_UNSAFE = 1
     VIDEO_SAFE = 0
 
+    # 最低视频数量
+    MIN_VIDEO_NUM = 1
+
 
 class NewContentIdTaskConst(HistoryContentIdTaskConst):
     """
@@ -70,7 +73,7 @@ class NewContentIdTaskConst(HistoryContentIdTaskConst):
     TASK_PROCESSING_TIMEOUT = 3600
 
     # 匹配最少视频数量
-    MIN_MATCH_VIDEO_NUM = 3
+    MIN_MATCH_VIDEO_NUM = 1
 
     # long_articles_text中,KIMI处理状态
     KIMI_INIT_STATUS = 0

+ 2 - 2
applications/functions/kimi.py

@@ -28,7 +28,7 @@ class KimiServer(object):
         contents = params['article_text']
         trace_id = params['content_id']
         try:
-            kimi_title = await cls.kimi_title(title, ai_model=deep_seek)
+            kimi_title = await cls.kimi_title(title, ai_model=moon_shot)
             # 判断kimi 标题是否安全
             title_score = await cls.get_kimi_title_safe_score(kimi_title, ai_model=moon_shot)
             kimi_green_title = None
@@ -64,7 +64,7 @@ class KimiServer(object):
 
         kimi_title = kimi_title.replace("'", "").replace('"', "").replace("\\", "")
         try:
-            kimi_info = await cls.kimi_mining(contents, ai_model=deep_seek)
+            kimi_info = await cls.kimi_mining(contents, ai_model=moon_shot)
         except Exception as e:
             logging(
                 code="4002",

+ 4 - 2
applications/spider/__init__.py

@@ -130,12 +130,13 @@ async def search_videos_from_web(info, gh_id_map, db_client):
             mention=False
         )
 
+    success_match_video_count = 0
     for recall_obj in ranked_list:
         if recall_obj:
             platform = recall_obj['platform']
             recall_video = recall_obj['result']
             score = recall_obj['score']
-            # 过滤掉nlp分低于0.45的
+            # 过滤掉nlp分低于0.55的
             if score < server_const.NLP_SIMILARITY_THRESHOLD:
                 continue
 
@@ -150,4 +151,5 @@ async def search_videos_from_web(info, gh_id_map, db_client):
                     db_client=db_client,
                     similarity_score=score
                 )
-    return len(ranked_list)
+                success_match_video_count += 1
+    return success_match_video_count

+ 1 - 1
tasks/history_task.py

@@ -54,7 +54,7 @@ class historyContentIdTask(object):
                 from {self.article_crawler_video_table}
                 where download_status = {self.const.VIDEO_DOWNLOAD_SUCCESS_STATUS}
                 group by content_id
-            ) VID on ART.content_id = VID.content_id and VID.cnt >= 3
+            ) VID on ART.content_id = VID.content_id and VID.cnt >= {self.const.MIN_VIDEO_NUM}
             WHERE ART.content_status = {self.const.TASK_INIT_STATUS} and ART.process_times <= {self.const.TASK_MAX_PROCESS_TIMES}
                 AND ART.publish_flag = {self.publish_flag}
             -- ORDER BY ART.flow_pool_level, ART.request_timestamp