|
@@ -1,10 +1,14 @@
|
|
|
"""
|
|
|
@author: luojunhui
|
|
|
"""
|
|
|
+
|
|
|
+import time
|
|
|
+import traceback
|
|
|
+
|
|
|
from pymysql.cursors import DictCursor
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
-from applications.api import get_response_by_deep_seek_api
|
|
|
+from applications.api import fetch_deepseek_response
|
|
|
from applications.const import VideoToTextConst
|
|
|
from applications.db import DatabaseConnector
|
|
|
from config import long_articles_config
|
|
@@ -32,6 +36,7 @@ class ArticleSummaryTask(object):
|
|
|
"""
|
|
|
文章总结任务
|
|
|
"""
|
|
|
+
|
|
|
def __init__(self):
|
|
|
self.db_client = None
|
|
|
|
|
@@ -47,100 +52,116 @@ class ArticleSummaryTask(object):
|
|
|
获取任务列表
|
|
|
"""
|
|
|
select_sql = f"""
|
|
|
- select t1.video_text, t2.audit_video_id
|
|
|
- from video_content_understanding t1
|
|
|
- join publish_single_video_source t2
|
|
|
- on t1.pq_vid = t2.audit_video_id
|
|
|
- where t1.status = {const.VIDEO_UNDERSTAND_SUCCESS_STATUS}
|
|
|
- and t2.bad_status = {const.ARTICLE_GOOD_STATUS}
|
|
|
- and t2.extract_status = {const.EXTRACT_INIT_STATUS};
|
|
|
+ select id, video_text
|
|
|
+ from video_content_understanding
|
|
|
+ where summary_status = {const.SUMMARY_INIT_STATUS} and status = {const.VIDEO_UNDERSTAND_SUCCESS_STATUS}
|
|
|
+ limit {const.SUMMARY_BATCH_SIZE};
|
|
|
"""
|
|
|
task_list = self.db_client.fetch(select_sql, cursor_type=DictCursor)
|
|
|
return task_list
|
|
|
|
|
|
- def process_each_task(self, task):
|
|
|
+ def rollback_lock_tasks(self):
|
|
|
"""
|
|
|
- 处理每个任务
|
|
|
+ rollback tasks which have been locked for a long time
|
|
|
"""
|
|
|
- video_text = task["video_text"]
|
|
|
- audit_video_id = task["audit_video_id"]
|
|
|
- # 开始处理,将extract_status更新为101
|
|
|
+ now_timestamp = int(time.time())
|
|
|
+ timestamp_threshold = now_timestamp - const.MAX_PROCESSING_TIME
|
|
|
update_sql = f"""
|
|
|
- update publish_single_video_source
|
|
|
- set extract_status = %s
|
|
|
- where audit_video_id = %s and extract_status = %s;
|
|
|
+ update video_content_understanding
|
|
|
+ set summary_status = %s
|
|
|
+ where summary_status = %s and status_update_timestamp < %s;
|
|
|
"""
|
|
|
- affected_rows = self.db_client.save(
|
|
|
+ rollback_rows = self.db_client.save(
|
|
|
query=update_sql,
|
|
|
- params=(const.EXTRACT_PROCESSING_STATUS, audit_video_id, const.EXTRACT_INIT_STATUS)
|
|
|
+ params=(const.SUMMARY_INIT_STATUS, const.SUMMARY_LOCK, timestamp_threshold),
|
|
|
+ )
|
|
|
+
|
|
|
+ return rollback_rows
|
|
|
+
|
|
|
+ def handle_task_execution(self, task):
|
|
|
+ """
|
|
|
+ :param task: keys: [id, video_text]
|
|
|
+ """
|
|
|
+ task_id = task["id"]
|
|
|
+ video_text = task["video_text"]
|
|
|
+
|
|
|
+ # Lock Task
|
|
|
+ affected_rows = self.update_task_status(
|
|
|
+ task_id, const.SUMMARY_INIT_STATUS, const.SUMMARY_LOCK
|
|
|
)
|
|
|
if not affected_rows:
|
|
|
return
|
|
|
+
|
|
|
try:
|
|
|
- # 生成prompt
|
|
|
+ # generate prompt
|
|
|
prompt = generate_prompt(video_text)
|
|
|
- response = get_response_by_deep_seek_api(model="DeepSeek-R1", prompt=prompt)
|
|
|
- if response:
|
|
|
- update_sql = f"""
|
|
|
- update publish_single_video_source
|
|
|
- set extract_status = %s, summary_text = %s
|
|
|
- where audit_video_id = %s and extract_status = %s;
|
|
|
- """
|
|
|
- affected_rows = self.db_client.save(
|
|
|
- query=update_sql,
|
|
|
- params=(
|
|
|
- const.EXTRACT_SUCCESS_STATUS,
|
|
|
- response.strip(),
|
|
|
- audit_video_id,
|
|
|
- const.EXTRACT_PROCESSING_STATUS
|
|
|
- )
|
|
|
- )
|
|
|
- print(affected_rows)
|
|
|
+
|
|
|
+ # get result from deep seek AI
|
|
|
+ result = fetch_deepseek_response(model="DeepSeek-R1", prompt=prompt)
|
|
|
+ if result:
|
|
|
+ # set as success and update summary text
|
|
|
+ self.set_summary_text_for_task(task_id, result.strip())
|
|
|
else:
|
|
|
- update_sql = f"""
|
|
|
- update publish_single_video_source
|
|
|
- set extract_status = %s
|
|
|
- where audit_video_id = %s and extract_status = %s;
|
|
|
- """
|
|
|
- affected_rows = self.db_client.save(
|
|
|
- query=update_sql,
|
|
|
- params=(
|
|
|
- const.EXTRACT_FAIL_STATUS,
|
|
|
- audit_video_id,
|
|
|
- const.EXTRACT_PROCESSING_STATUS
|
|
|
- )
|
|
|
+ # set as fail
|
|
|
+ self.update_task_status(
|
|
|
+ task_id, const.SUMMARY_LOCK, const.SUMMARY_FAIL_STATUS
|
|
|
)
|
|
|
- print(affected_rows)
|
|
|
except Exception as e:
|
|
|
print(e)
|
|
|
+ print(traceback.format_exc())
|
|
|
# set as fail
|
|
|
- update_sql = f"""
|
|
|
- update publish_single_video_source
|
|
|
- set extract_status = %s
|
|
|
- where audit_video_id = %s and extract_status = %s;
|
|
|
- """
|
|
|
- self.db_client.save(
|
|
|
- query=update_sql,
|
|
|
- params=(
|
|
|
- const.EXTRACT_FAIL_STATUS,
|
|
|
- audit_video_id,
|
|
|
- const.EXTRACT_PROCESSING_STATUS
|
|
|
- )
|
|
|
+ self.update_task_status(
|
|
|
+ task_id, const.SUMMARY_LOCK, const.SUMMARY_FAIL_STATUS
|
|
|
)
|
|
|
|
|
|
+ def set_summary_text_for_task(self, task_id, text):
|
|
|
+ """
|
|
|
+ successfully get summary text and update summary text to database
|
|
|
+ """
|
|
|
+ update_sql = f"""
|
|
|
+ update video_content_understanding
|
|
|
+ set summary_status = %s, summary_text = %s, status_update_timestamp = %s
|
|
|
+ where id = %s and summary_status = %s;
|
|
|
+ """
|
|
|
+ affected_rows = self.db_client.save(
|
|
|
+ query=update_sql,
|
|
|
+ params=(
|
|
|
+ const.SUMMARY_SUCCESS_STATUS,
|
|
|
+ text,
|
|
|
+ int(time.time()),
|
|
|
+ task_id,
|
|
|
+ const.SUMMARY_LOCK,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ return affected_rows
|
|
|
+
|
|
|
+ def update_task_status(self, task_id, ori_status, new_status):
|
|
|
+ """
|
|
|
+ 修改任务状态
|
|
|
+ """
|
|
|
+ update_sql = f"""
|
|
|
+ update video_content_understanding
|
|
|
+ set summary_status = %s, status_update_timestamp = %s
|
|
|
+ where id = %s and summary_status = %s;
|
|
|
+ """
|
|
|
+ update_rows = self.db_client.save(
|
|
|
+ update_sql, (new_status, int(time.time()), task_id, ori_status)
|
|
|
+ )
|
|
|
+ return update_rows
|
|
|
+
|
|
|
def deal(self):
|
|
|
"""
|
|
|
- 开始处理任务
|
|
|
+ entrance function for this class
|
|
|
"""
|
|
|
+ # first of all rollback tasks which have been locked for a long time
|
|
|
+ rollback_rows = self.rollback_lock_tasks()
|
|
|
+ print("rollback_lock_tasks: {}".format(rollback_rows))
|
|
|
+
|
|
|
+ # get task list
|
|
|
task_list = self.get_task_list()
|
|
|
- for task in tqdm(task_list):
|
|
|
+ for task in tqdm(task_list, desc="handle each task"):
|
|
|
try:
|
|
|
- self.process_each_task(task)
|
|
|
+ self.handle_task_execution(task=task)
|
|
|
except Exception as e:
|
|
|
- print(e)
|
|
|
- continue
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
+ print("error: {}".format(e))
|
|
|
+ print(traceback.format_exc())
|