luojunhui преди 8 месеца
родител
ревизия
968ea7dd84
променени са 2 файла, в които са добавени 124 реда и са изтрити 2 реда
  1. 4 2
      coldStartTasks/multi_modal/generate_text_from_video.py
  2. 120 0
      tasks/article_summary_task.py

+ 4 - 2
coldStartTasks/multi_modal/generate_text_from_video.py

@@ -147,7 +147,8 @@ class GenerateTextFromVideo(object):
                                     update_sql,
                                     params=(2, video_text, state, task['pq_vid'])
                                 )
-                                os.remove(video_local_path)
+                                if os.path.exists(video_local_path):
+                                    os.remove(video_local_path)
                                 tqdm.write("video transform to text success, delete local file, sleep 1 min...")
                                 task_list.remove(task)
                         except Exception as e:
@@ -168,7 +169,8 @@ class GenerateTextFromVideo(object):
                             update_sql,
                             params=(99, state, task['pq_vid'])
                         )
-                        os.remove(video_local_path)
+                        if os.path.exists(video_local_path):
+                            os.remove(video_local_path)
                         tqdm.write("video process failed, delete local file")
                         continue
                 time.sleep(10)

+ 120 - 0
tasks/article_summary_task.py

@@ -0,0 +1,120 @@
+"""
+@author: luojunhui
+"""
+from pymysql.cursors import DictCursor
+
+from applications.api import deep_seek_api
+from applications.db import DatabaseConnector
+from config import long_articles_config
+
+
+def generate_prompt(text):
+    """
+    生成prompt
+    """
+    prompt = f"""
+        你是1个优秀的公众号文章写作大师,我对你有以下要求
+        文章: {text}
+        1.请仔细阅读以上公众号文章,挑选文章中最吸引人的情节或话题,总结为100字左右文章精彩总结(字数计算包括标点符号)。
+        句子段落之间以悬念承接,可以吸引读者往下读第二句。
+        2.在这100字内容的结尾处,增加1-2句话的引导,引导大家去观看上面的视频了解详情。注意是点击上面的视频,不是下面的视频。
+
+        你最终输出一段总结内容,不用加标题或者主题,也不用写第几段、多少字这样的话。整体的语言风格要口语化、直接点,要让60岁以上的老年人能看懂、能共情。人的名字尽量用全名,不用简称。
+    """
+    return prompt
+
+
+class ArticleSummaryTask(object):
+    """
+    文章总结任务
+    """
+    def __init__(self):
+        self.db_client = None
+
+    def connect_db(self):
+        """
+        连接数据库
+        """
+        self.db_client = DatabaseConnector(db_config=long_articles_config)
+        self.db_client.connect()
+
+    def get_task_list(self):
+        """
+        获取任务列表
+        """
+        select_sql = f"""
+            select t1.video_text, t2.audit_video_id
+            from video_content_understanding t1 join publish_single_video_source t2 on t1.pq_vid = t2.audit_video_id
+            where t1.status = 2 and t2.bad_status = 0 and t2.extract_status = 1 limit 20;
+        """
+        task_list = self.db_client.fetch(select_sql, cursor_type=DictCursor)
+        return task_list
+
+    def process_each_task(self, task):
+        """
+        task: {
+            "video_text": "视频内容",
+            "audit_video_id": "视频id"
+        }
+        """
+        video_text = task["video_text"]
+        audit_video_id = task["audit_video_id"]
+        # 开始处理,将extract_status更新为101
+        update_sql = f"""
+            update publish_single_video_source set extract_status = %s where audit_video_id = %s
+        """
+        affected_rows = self.db_client.save(
+            query=update_sql,
+            params=(101, audit_video_id)
+        )
+        if not affected_rows:
+            return
+
+        # 生成prompt
+        prompt = generate_prompt(video_text)
+        response = deep_seek_api(model="DeepSeek-R1", prompt=prompt)
+        if response:
+            update_sql = f"""
+                update publish_single_video_source 
+                set extract_status = %s, summary_text = %s
+                where audit_video_id = %s and extract_status = %s;
+            """
+            affected_rows = self.db_client.save(
+                query=update_sql,
+                params=(2, response, audit_video_id, 101)
+            )
+            print(affected_rows)
+        else:
+            update_sql = f"""
+                update publish_single_video_source 
+                set extract_status = %s
+                where audit_video_id = %s and extract_status = %s;
+            """
+            affected_rows = self.db_client.save(
+                query=update_sql,
+                params=(99, audit_video_id, 101)
+            )
+            print(affected_rows)
+
+    def deal(self):
+        """
+        开始处理任务
+        """
+        task_list = self.get_task_list()
+        for task in task_list:
+            try:
+                self.process_each_task(task)
+            except Exception as e:
+                print(e)
+                continue
+
+
+if __name__ == '__main__':
+    article_summary_task = ArticleSummaryTask()
+    article_summary_task.connect_db()
+    article_summary_task.deal()
+
+
+
+
+