|
@@ -9,14 +9,19 @@ from tqdm import tqdm
|
|
from pymysql.cursors import DictCursor
|
|
from pymysql.cursors import DictCursor
|
|
|
|
|
|
from applications.api import GoogleAIAPI
|
|
from applications.api import GoogleAIAPI
|
|
|
|
+from applications.const import GoogleVideoUnderstandTaskConst
|
|
from applications.db import DatabaseConnector
|
|
from applications.db import DatabaseConnector
|
|
from config import long_articles_config
|
|
from config import long_articles_config
|
|
from coldStartTasks.ai_pipeline.basic import download_file
|
|
from coldStartTasks.ai_pipeline.basic import download_file
|
|
from coldStartTasks.ai_pipeline.basic import update_task_queue_status
|
|
from coldStartTasks.ai_pipeline.basic import update_task_queue_status
|
|
from coldStartTasks.ai_pipeline.basic import roll_back_lock_tasks
|
|
from coldStartTasks.ai_pipeline.basic import roll_back_lock_tasks
|
|
-from coldStartTasks.ai_pipeline.basic import extract_prompt
|
|
|
|
|
|
+from coldStartTasks.ai_pipeline.basic import extract_best_frame_prompt
|
|
|
|
+from coldStartTasks.ai_pipeline.basic import get_video_cover
|
|
|
|
+from coldStartTasks.ai_pipeline.basic import normalize_time_str
|
|
|
|
|
|
|
|
+const = GoogleVideoUnderstandTaskConst()
|
|
table_name = "long_articles_new_video_cover"
|
|
table_name = "long_articles_new_video_cover"
|
|
|
|
+dir_name = "static"
|
|
POOL_SIZE = 10
|
|
POOL_SIZE = 10
|
|
google_ai = GoogleAIAPI()
|
|
google_ai = GoogleAIAPI()
|
|
|
|
|
|
@@ -30,13 +35,13 @@ class ExtractVideoBestFrame:
|
|
self.db_client = DatabaseConnector(db_config=long_articles_config)
|
|
self.db_client = DatabaseConnector(db_config=long_articles_config)
|
|
self.db_client.connect()
|
|
self.db_client.connect()
|
|
|
|
|
|
- def get_upload_task_list(self, task_num: int = 10) -> list[dict]:
|
|
|
|
|
|
+ def get_upload_task_list(self, task_num: int = POOL_SIZE) -> list[dict]:
|
|
"""
|
|
"""
|
|
get upload task list
|
|
get upload task list
|
|
"""
|
|
"""
|
|
fetch_query = f"""
|
|
fetch_query = f"""
|
|
select id, video_oss_path from {table_name}
|
|
select id, video_oss_path from {table_name}
|
|
- where upload_status = 0 and priority = 1
|
|
|
|
|
|
+ where upload_status = {const.INIT_STATUS} and priority = 1
|
|
limit {task_num};
|
|
limit {task_num};
|
|
"""
|
|
"""
|
|
upload_task_list = self.db_client.fetch(
|
|
upload_task_list = self.db_client.fetch(
|
|
@@ -44,13 +49,13 @@ class ExtractVideoBestFrame:
|
|
)
|
|
)
|
|
return upload_task_list
|
|
return upload_task_list
|
|
|
|
|
|
- def get_extract_task_list(self, task_num: int = 10) -> list[dict]:
|
|
|
|
|
|
+ def get_extract_task_list(self, task_num: int = POOL_SIZE) -> list[dict]:
|
|
"""
|
|
"""
|
|
get extract task list
|
|
get extract task list
|
|
"""
|
|
"""
|
|
fetch_query = f"""
|
|
fetch_query = f"""
|
|
select id, file_name from {table_name}
|
|
select id, file_name from {table_name}
|
|
- where upload_status = 2 and extract_status = 0
|
|
|
|
|
|
+ where upload_status = {const.SUCCESS_STATUS} and extract_status = {const.INIT_STATUS}
|
|
order by file_expire_time
|
|
order by file_expire_time
|
|
limit {task_num};
|
|
limit {task_num};
|
|
"""
|
|
"""
|
|
@@ -59,13 +64,26 @@ class ExtractVideoBestFrame:
|
|
)
|
|
)
|
|
return extract_task_list
|
|
return extract_task_list
|
|
|
|
|
|
|
|
+ def get_cover_task_list(self) -> list[dict]:
|
|
|
|
+ """
|
|
|
|
+ get cover task list
|
|
|
|
+ """
|
|
|
|
+ fetch_query = f"""
|
|
|
|
+ select id, video_oss_path, best_frame_time_ms from {table_name}
|
|
|
|
+ where extract_status = {const.SUCCESS_STATUS} and get_cover_status = {const.INIT_STATUS};
|
|
|
|
+ """
|
|
|
|
+ extract_task_list = self.db_client.fetch(
|
|
|
|
+ query=fetch_query, cursor_type=DictCursor
|
|
|
|
+ )
|
|
|
|
+ return extract_task_list
|
|
|
|
+
|
|
def get_processing_task_pool_size(self) -> int:
|
|
def get_processing_task_pool_size(self) -> int:
|
|
"""
|
|
"""
|
|
get processing task pool size
|
|
get processing task pool size
|
|
"""
|
|
"""
|
|
fetch_query = f"""
|
|
fetch_query = f"""
|
|
select count(1) as pool_size from {table_name}
|
|
select count(1) as pool_size from {table_name}
|
|
- where upload_status = 2 and file_state = 'PROCESSING' and extract_status = 0;
|
|
|
|
|
|
+ where upload_status = {const.SUCCESS_STATUS} and file_state = 'PROCESSING' and extract_status = {const.INIT_STATUS};
|
|
"""
|
|
"""
|
|
fetch_response = self.db_client.fetch(query=fetch_query, cursor_type=DictCursor)
|
|
fetch_response = self.db_client.fetch(query=fetch_query, cursor_type=DictCursor)
|
|
processing_task_pool_size = (
|
|
processing_task_pool_size = (
|
|
@@ -85,13 +103,13 @@ class ExtractVideoBestFrame:
|
|
update_rows = self.db_client.save(
|
|
update_rows = self.db_client.save(
|
|
query=update_query,
|
|
query=update_query,
|
|
params=(
|
|
params=(
|
|
- 2,
|
|
|
|
|
|
+ const.SUCCESS_STATUS,
|
|
datetime.datetime.now(),
|
|
datetime.datetime.now(),
|
|
file_name,
|
|
file_name,
|
|
file_state,
|
|
file_state,
|
|
file_expire_time,
|
|
file_expire_time,
|
|
task_id,
|
|
task_id,
|
|
- 1,
|
|
|
|
|
|
+ const.PROCESSING_STATUS,
|
|
),
|
|
),
|
|
)
|
|
)
|
|
return update_rows
|
|
return update_rows
|
|
@@ -108,12 +126,12 @@ class ExtractVideoBestFrame:
|
|
update_rows = self.db_client.save(
|
|
update_rows = self.db_client.save(
|
|
query=update_query,
|
|
query=update_query,
|
|
params=(
|
|
params=(
|
|
- 2,
|
|
|
|
|
|
+ const.SUCCESS_STATUS,
|
|
datetime.datetime.now(),
|
|
datetime.datetime.now(),
|
|
file_state,
|
|
file_state,
|
|
best_frame_tims_ms,
|
|
best_frame_tims_ms,
|
|
task_id,
|
|
task_id,
|
|
- 1,
|
|
|
|
|
|
+ const.PROCESSING_STATUS,
|
|
),
|
|
),
|
|
)
|
|
)
|
|
return update_rows
|
|
return update_rows
|
|
@@ -125,9 +143,9 @@ class ExtractVideoBestFrame:
|
|
roll_back_lock_tasks_count = roll_back_lock_tasks(
|
|
roll_back_lock_tasks_count = roll_back_lock_tasks(
|
|
db_client=self.db_client,
|
|
db_client=self.db_client,
|
|
task="upload",
|
|
task="upload",
|
|
- init_status=0,
|
|
|
|
- processing_status=1,
|
|
|
|
- max_process_time=3600,
|
|
|
|
|
|
+ init_status=const.INIT_STATUS,
|
|
|
|
+ processing_status=const.PROCESSING_STATUS,
|
|
|
|
+ max_process_time=const.MAX_PROCESSING_TIME,
|
|
)
|
|
)
|
|
print("roll_back_lock_tasks_count", roll_back_lock_tasks_count)
|
|
print("roll_back_lock_tasks_count", roll_back_lock_tasks_count)
|
|
|
|
|
|
@@ -140,8 +158,8 @@ class ExtractVideoBestFrame:
|
|
db_client=self.db_client,
|
|
db_client=self.db_client,
|
|
task_id=task["id"],
|
|
task_id=task["id"],
|
|
task="upload",
|
|
task="upload",
|
|
- ori_status=0,
|
|
|
|
- new_status=1,
|
|
|
|
|
|
+ ori_status=const.INIT_STATUS,
|
|
|
|
+ new_status=const.PROCESSING_STATUS,
|
|
)
|
|
)
|
|
if not lock_status:
|
|
if not lock_status:
|
|
continue
|
|
continue
|
|
@@ -163,8 +181,8 @@ class ExtractVideoBestFrame:
|
|
db_client=self.db_client,
|
|
db_client=self.db_client,
|
|
task_id=task["id"],
|
|
task_id=task["id"],
|
|
task="upload",
|
|
task="upload",
|
|
- ori_status=1,
|
|
|
|
- new_status=99,
|
|
|
|
|
|
+ ori_status=const.PROCESSING_STATUS,
|
|
|
|
+ new_status=const.FAIL_STATUS,
|
|
)
|
|
)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
print(f"download_file error: {e}")
|
|
print(f"download_file error: {e}")
|
|
@@ -172,8 +190,8 @@ class ExtractVideoBestFrame:
|
|
db_client=self.db_client,
|
|
db_client=self.db_client,
|
|
task_id=task["id"],
|
|
task_id=task["id"],
|
|
task="upload",
|
|
task="upload",
|
|
- ori_status=1,
|
|
|
|
- new_status=99,
|
|
|
|
|
|
+ ori_status=const.PROCESSING_STATUS,
|
|
|
|
+ new_status=const.FAIL_STATUS,
|
|
)
|
|
)
|
|
continue
|
|
continue
|
|
|
|
|
|
@@ -185,9 +203,9 @@ class ExtractVideoBestFrame:
|
|
roll_back_lock_tasks_count = roll_back_lock_tasks(
|
|
roll_back_lock_tasks_count = roll_back_lock_tasks(
|
|
db_client=self.db_client,
|
|
db_client=self.db_client,
|
|
task="extract",
|
|
task="extract",
|
|
- init_status=0,
|
|
|
|
- processing_status=1,
|
|
|
|
- max_process_time=3600,
|
|
|
|
|
|
+ init_status=const.INIT_STATUS,
|
|
|
|
+ processing_status=const.PROCESSING_STATUS,
|
|
|
|
+ max_process_time=const.MAX_PROCESSING_TIME,
|
|
)
|
|
)
|
|
print("roll_back_lock_tasks_count", roll_back_lock_tasks_count)
|
|
print("roll_back_lock_tasks_count", roll_back_lock_tasks_count)
|
|
|
|
|
|
@@ -199,18 +217,17 @@ class ExtractVideoBestFrame:
|
|
db_client=self.db_client,
|
|
db_client=self.db_client,
|
|
task_id=task["id"],
|
|
task_id=task["id"],
|
|
task="extract",
|
|
task="extract",
|
|
- ori_status=0,
|
|
|
|
- new_status=1,
|
|
|
|
|
|
+ ori_status=const.INIT_STATUS,
|
|
|
|
+ new_status=const.PROCESSING_STATUS,
|
|
)
|
|
)
|
|
if not lock_status:
|
|
if not lock_status:
|
|
continue
|
|
continue
|
|
|
|
|
|
file_name = task["file_name"]
|
|
file_name = task["file_name"]
|
|
- video_local_path = "static/{}.mp4".format(task["id"])
|
|
|
|
|
|
+ video_local_path = os.path.join(dir_name, "{}.mp4".format(task["id"]))
|
|
try:
|
|
try:
|
|
google_file = google_ai.get_google_file(file_name)
|
|
google_file = google_ai.get_google_file(file_name)
|
|
state = google_file.state.name
|
|
state = google_file.state.name
|
|
-
|
|
|
|
match state:
|
|
match state:
|
|
case "PROCESSING":
|
|
case "PROCESSING":
|
|
# google is still processing this video
|
|
# google is still processing this video
|
|
@@ -218,8 +235,8 @@ class ExtractVideoBestFrame:
|
|
db_client=self.db_client,
|
|
db_client=self.db_client,
|
|
task_id=task["id"],
|
|
task_id=task["id"],
|
|
task="extract",
|
|
task="extract",
|
|
- ori_status=1,
|
|
|
|
- new_status=0,
|
|
|
|
|
|
+ ori_status=const.PROCESSING_STATUS,
|
|
|
|
+ new_status=const.INIT_STATUS,
|
|
)
|
|
)
|
|
print("this video is still processing")
|
|
print("this video is still processing")
|
|
|
|
|
|
@@ -234,10 +251,10 @@ class ExtractVideoBestFrame:
|
|
query=update_query,
|
|
query=update_query,
|
|
params=(
|
|
params=(
|
|
"FAILED",
|
|
"FAILED",
|
|
- 99,
|
|
|
|
|
|
+ const.FAIL_STATUS,
|
|
datetime.datetime.now(),
|
|
datetime.datetime.now(),
|
|
task["id"],
|
|
task["id"],
|
|
- 1,
|
|
|
|
|
|
+ const.PROCESSING_STATUS,
|
|
),
|
|
),
|
|
)
|
|
)
|
|
|
|
|
|
@@ -245,7 +262,8 @@ class ExtractVideoBestFrame:
|
|
# video process successfully
|
|
# video process successfully
|
|
try:
|
|
try:
|
|
best_frame_tims_ms = google_ai.fetch_info_from_google_ai(
|
|
best_frame_tims_ms = google_ai.fetch_info_from_google_ai(
|
|
- prompt=extract_prompt, video_file=google_file
|
|
|
|
|
|
+ prompt=extract_best_frame_prompt(),
|
|
|
|
+ video_file=google_file,
|
|
)
|
|
)
|
|
if best_frame_tims_ms:
|
|
if best_frame_tims_ms:
|
|
self.set_extract_result(
|
|
self.set_extract_result(
|
|
@@ -258,8 +276,8 @@ class ExtractVideoBestFrame:
|
|
db_client=self.db_client,
|
|
db_client=self.db_client,
|
|
task_id=task["id"],
|
|
task_id=task["id"],
|
|
task="extract",
|
|
task="extract",
|
|
- ori_status=1,
|
|
|
|
- new_status=99,
|
|
|
|
|
|
+ ori_status=const.PROCESSING_STATUS,
|
|
|
|
+ new_status=const.FAIL_STATUS,
|
|
)
|
|
)
|
|
# delete local file and google file
|
|
# delete local file and google file
|
|
if os.path.exists(video_local_path):
|
|
if os.path.exists(video_local_path):
|
|
@@ -272,8 +290,8 @@ class ExtractVideoBestFrame:
|
|
db_client=self.db_client,
|
|
db_client=self.db_client,
|
|
task_id=task["id"],
|
|
task_id=task["id"],
|
|
task="extract",
|
|
task="extract",
|
|
- ori_status=1,
|
|
|
|
- new_status=99,
|
|
|
|
|
|
+ ori_status=const.PROCESSING_STATUS,
|
|
|
|
+ new_status=const.FAIL_STATUS,
|
|
)
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
@@ -282,6 +300,64 @@ class ExtractVideoBestFrame:
|
|
db_client=self.db_client,
|
|
db_client=self.db_client,
|
|
task_id=task["id"],
|
|
task_id=task["id"],
|
|
task="extract",
|
|
task="extract",
|
|
- ori_status=1,
|
|
|
|
- new_status=99,
|
|
|
|
|
|
+ ori_status=const.PROCESSING_STATUS,
|
|
|
|
+ new_status=const.FAIL_STATUS,
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ def get_cover_with_best_frame(self):
|
|
|
|
+ """
|
|
|
|
+ get cover with best frame
|
|
|
|
+ """
|
|
|
|
+ # get task list
|
|
|
|
+ task_list = self.get_cover_task_list()
|
|
|
|
+ for task in tqdm(task_list, desc="extract_cover_with_ffmpeg"):
|
|
|
|
+ # lock task
|
|
|
|
+ lock_status = update_task_queue_status(
|
|
|
|
+ db_client=self.db_client,
|
|
|
|
+ task_id=task["id"],
|
|
|
|
+ task="get_cover",
|
|
|
|
+ ori_status=const.INIT_STATUS,
|
|
|
|
+ new_status=const.PROCESSING_STATUS,
|
|
|
|
+ )
|
|
|
|
+ if not lock_status:
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ time_str = normalize_time_str(task["best_frame_time_ms"])
|
|
|
|
+ if time_str:
|
|
|
|
+ response = get_video_cover(
|
|
|
|
+ video_oss_path=task["video_oss_path"], time_millisecond_str=time_str
|
|
|
|
+ )
|
|
|
|
+ print(response)
|
|
|
|
+ if response["success"] and response["data"]:
|
|
|
|
+ cover_oss_path = response["data"]
|
|
|
|
+ update_query = f"""
|
|
|
|
+ update {table_name}
|
|
|
|
+ set cover_oss_path = %s, get_cover_status = %s, get_cover_status_ts = %s
|
|
|
|
+ where id = %s and get_cover_status = %s;
|
|
|
|
+ """
|
|
|
|
+ update_rows = self.db_client.save(
|
|
|
|
+ query=update_query,
|
|
|
|
+ params=(
|
|
|
|
+ cover_oss_path,
|
|
|
|
+ const.SUCCESS_STATUS,
|
|
|
|
+ datetime.datetime.now(),
|
|
|
|
+ task["id"],
|
|
|
|
+ const.PROCESSING_STATUS,
|
|
|
|
+ ),
|
|
|
|
+ )
|
|
|
|
+ else:
|
|
|
|
+ update_task_queue_status(
|
|
|
|
+ db_client=self.db_client,
|
|
|
|
+ task_id=task["id"],
|
|
|
|
+ task="get_cover",
|
|
|
|
+ ori_status=const.PROCESSING_STATUS,
|
|
|
|
+ new_status=const.FAIL_STATUS,
|
|
|
|
+ )
|
|
|
|
+ else:
|
|
|
|
+ update_task_queue_status(
|
|
|
|
+ db_client=self.db_client,
|
|
|
|
+ task_id=task["id"],
|
|
|
|
+ task="get_cover",
|
|
|
|
+ ori_status=const.PROCESSING_STATUS,
|
|
|
|
+ new_status=const.FAIL_STATUS,
|
|
)
|
|
)
|