|
@@ -3,13 +3,13 @@
|
|
"""
|
|
"""
|
|
import asyncio
|
|
import asyncio
|
|
|
|
|
|
-from applications.static.config import db_article
|
|
|
|
|
|
+from static.config import db_article, db_video
|
|
from applications.schedule import search_videos
|
|
from applications.schedule import search_videos
|
|
from applications.functions.log import logging
|
|
from applications.functions.log import logging
|
|
-from applications.static.config import spider_coroutines
|
|
|
|
|
|
+from static.config import spider_coroutines
|
|
|
|
|
|
|
|
|
|
-class ProcessDeal(object):
|
|
|
|
|
|
+class MatchTask1(object):
|
|
"""
|
|
"""
|
|
定时执行任务
|
|
定时执行任务
|
|
"""
|
|
"""
|
|
@@ -28,7 +28,7 @@ class ProcessDeal(object):
|
|
select_sql1 = f"""
|
|
select_sql1 = f"""
|
|
SELECT DISTINCT (content_id)
|
|
SELECT DISTINCT (content_id)
|
|
FROM {db_article}
|
|
FROM {db_article}
|
|
- WHERE content_status = 0 and process_times <= 5
|
|
|
|
|
|
+ WHERE content_status = 0 and process_times <= 3
|
|
ORDER BY request_time_stamp
|
|
ORDER BY request_time_stamp
|
|
ASC
|
|
ASC
|
|
LIMIT {spider_coroutines};
|
|
LIMIT {spider_coroutines};
|
|
@@ -42,11 +42,10 @@ class ProcessDeal(object):
|
|
select_sql = f"""
|
|
select_sql = f"""
|
|
SELECT trace_id, content_id, gh_id, article_title, article_text, content_status, process_times
|
|
SELECT trace_id, content_id, gh_id, article_title, article_text, content_status, process_times
|
|
FROM {db_article}
|
|
FROM {db_article}
|
|
- WHERE content_id in {content_ids_tuple} and process_times <= 5
|
|
|
|
|
|
+ WHERE content_id in {content_ids_tuple} and process_times <= 3
|
|
ORDER BY request_time_stamp
|
|
ORDER BY request_time_stamp
|
|
ASC;
|
|
ASC;
|
|
"""
|
|
"""
|
|
- print(select_sql)
|
|
|
|
task_list = await self.mysql_client.async_select(sql=select_sql)
|
|
task_list = await self.mysql_client.async_select(sql=select_sql)
|
|
task_obj_list = [
|
|
task_obj_list = [
|
|
{
|
|
{
|
|
@@ -68,26 +67,21 @@ class ProcessDeal(object):
|
|
else:
|
|
else:
|
|
return []
|
|
return []
|
|
|
|
|
|
- async def get_history_contents(self, content_id):
|
|
|
|
|
|
+ async def get_history_videos(self, content_id):
|
|
"""
|
|
"""
|
|
- check whether the content id exists
|
|
|
|
- :return: trace_id or None
|
|
|
|
|
|
+ check whether the contents videos exists
|
|
|
|
+ :param content_id:
|
|
|
|
+ :return:
|
|
"""
|
|
"""
|
|
select_sql = f"""
|
|
select_sql = f"""
|
|
- SELECT trace_id, content_status
|
|
|
|
- FROM {db_article}
|
|
|
|
- WHERE content_id = '{content_id}'
|
|
|
|
- ORDER BY id DESC;
|
|
|
|
- """
|
|
|
|
- result = await self.mysql_client.async_select(select_sql)
|
|
|
|
- if result:
|
|
|
|
- for item in result:
|
|
|
|
- trace_id, content_status = item
|
|
|
|
- if content_status == 2:
|
|
|
|
- return trace_id
|
|
|
|
- else:
|
|
|
|
- continue
|
|
|
|
- return None
|
|
|
|
|
|
+ SELECT video_id
|
|
|
|
+ FROM {db_video}
|
|
|
|
+ where content_id = '{content_id}' and video_status = 1 order by request_time DESC;
|
|
|
|
+ """
|
|
|
|
+ content_videos = await self.mysql_client.async_select(select_sql)
|
|
|
|
+ videos = [vid for vid in content_videos]
|
|
|
|
+ if len(videos) >= 3:
|
|
|
|
+ return videos
|
|
else:
|
|
else:
|
|
return None
|
|
return None
|
|
|
|
|
|
@@ -113,70 +107,48 @@ class ProcessDeal(object):
|
|
else:
|
|
else:
|
|
return True
|
|
return True
|
|
|
|
|
|
- async def insert_history_contents_videos(self, history_trace_id, params):
|
|
|
|
|
|
+ async def use_exists_contents_videos(self, video_id_list, params):
|
|
"""
|
|
"""
|
|
- 插入历史视频id
|
|
|
|
|
|
+ 使用已经存在的视频id
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
|
|
+ trace_id = params['trace_id']
|
|
|
|
+ content_id = params['content_id']
|
|
select_sql = f"""
|
|
select_sql = f"""
|
|
- SELECT kimi_title, recall_video_id1, recall_video_id2, recall_video_id3
|
|
|
|
|
|
+ SELECT kimi_title
|
|
FROM {db_article}
|
|
FROM {db_article}
|
|
- WHERE trace_id = '{history_trace_id}';
|
|
|
|
|
|
+ WHERE content_id = '{content_id}' and kimi_title is not null limit 1;
|
|
"""
|
|
"""
|
|
info = await self.mysql_client.async_select(sql=select_sql)
|
|
info = await self.mysql_client.async_select(sql=select_sql)
|
|
- kimi_title, vid1, vid2, vid3 = info[0]
|
|
|
|
|
|
+ kimi_title = info[0]
|
|
update_sql = f"""
|
|
update_sql = f"""
|
|
- UPDATE {db_article}
|
|
|
|
- SET
|
|
|
|
- kimi_title=%s,
|
|
|
|
- recall_video_id1=%s,
|
|
|
|
- recall_video_id2=%s,
|
|
|
|
- recall_video_id3=%s,
|
|
|
|
- content_status=%s,
|
|
|
|
- process_times = %s
|
|
|
|
- WHERE trace_id = %s
|
|
|
|
|
|
+ UPDATE {db_article}
|
|
|
|
+ SET
|
|
|
|
+ kimi_title=%s,
|
|
|
|
+ recall_video_id1=%s,
|
|
|
|
+ recall_video_id2=%s,
|
|
|
|
+ recall_video_id3=%s,
|
|
|
|
+ content_status=%s,
|
|
|
|
+ process_times = %s
|
|
|
|
+ WHERE trace_id = %s
|
|
"""
|
|
"""
|
|
|
|
+ vid1, vid2, vid3 = video_id_list[0], video_id_list[1], video_id_list[2]
|
|
await self.mysql_client.async_insert(
|
|
await self.mysql_client.async_insert(
|
|
sql=update_sql,
|
|
sql=update_sql,
|
|
params=(
|
|
params=(
|
|
kimi_title,
|
|
kimi_title,
|
|
- vid1,
|
|
|
|
|
|
+ video_id_list[0],
|
|
"NULL" if vid2 is None else vid2,
|
|
"NULL" if vid2 is None else vid2,
|
|
"NULL" if vid3 is None else vid3,
|
|
"NULL" if vid3 is None else vid3,
|
|
2,
|
|
2,
|
|
int(params['process_times']) + 1,
|
|
int(params['process_times']) + 1,
|
|
- params['trace_id']
|
|
|
|
|
|
+ trace_id
|
|
)
|
|
)
|
|
)
|
|
)
|
|
logging(
|
|
logging(
|
|
code="9002",
|
|
code="9002",
|
|
- info="已从历史文章更新,历史id: {}".format(history_trace_id),
|
|
|
|
- trace_id=params['trace_id']
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
- async def process_video_id(self, title, trace_id, process_times):
|
|
|
|
- """
|
|
|
|
- 如果video_id在标题中,则做特殊处理
|
|
|
|
- :return:
|
|
|
|
- """
|
|
|
|
- video_id = title.split("video_id=")[-1]
|
|
|
|
- update_sql = f"""
|
|
|
|
- UPDATE
|
|
|
|
- {db_article}
|
|
|
|
- SET
|
|
|
|
- recall_video_id1 = %s,
|
|
|
|
- content_status = %s,
|
|
|
|
- process_times = %s
|
|
|
|
- WHERE
|
|
|
|
- trace_id = %s;"""
|
|
|
|
- await self.mysql_client.async_insert(
|
|
|
|
- sql=update_sql,
|
|
|
|
- params=(
|
|
|
|
- video_id,
|
|
|
|
- 2,
|
|
|
|
- {int(process_times) + 1},
|
|
|
|
- trace_id
|
|
|
|
- )
|
|
|
|
|
|
+ info="已从历史文章更新,文章id: {}".format(content_id),
|
|
|
|
+ trace_id=trace_id
|
|
)
|
|
)
|
|
|
|
|
|
async def start_process(self, params):
|
|
async def start_process(self, params):
|
|
@@ -200,89 +172,88 @@ class ProcessDeal(object):
|
|
)
|
|
)
|
|
)
|
|
)
|
|
try:
|
|
try:
|
|
- # 判断标题中是否包含video_id
|
|
|
|
- if "video_id=" in params['title']:
|
|
|
|
|
|
+ video_count = await search_videos(
|
|
|
|
+ params={
|
|
|
|
+ "title": params['title'],
|
|
|
|
+ "content": params['text'],
|
|
|
|
+ "trace_id": params['trace_id'],
|
|
|
|
+ "content_id": params['content_id']
|
|
|
|
+ },
|
|
|
|
+ trace_id=params['trace_id'],
|
|
|
|
+ gh_id=params['gh_id'],
|
|
|
|
+ mysql_client=self.mysql_client
|
|
|
|
+ )
|
|
|
|
+ select_sql = f"""
|
|
|
|
+ SELECT video_id
|
|
|
|
+ FROM {db_video}
|
|
|
|
+ WHERE content_id = '{params['content_id']}'
|
|
|
|
+ """
|
|
|
|
+ result = await self.mysql_client.async_select(sql=select_sql)
|
|
|
|
+ vid1, vid2, vid3 = result[0], result[1], result[2]
|
|
|
|
+ if vid1 or vid2 or vid3:
|
|
|
|
+ update_sql2 = f"""
|
|
|
|
+ UPDATE {db_article}
|
|
|
|
+ SET
|
|
|
|
+ recall_video_id1 = %s,
|
|
|
|
+ recall_video_id2 = %s,
|
|
|
|
+ recall_video_id3 = %s,
|
|
|
|
+ content_status = %s,
|
|
|
|
+ process_times = %s
|
|
|
|
+ WHERE trace_id = %s;
|
|
|
|
+ """
|
|
|
|
+ await self.mysql_client.async_insert(
|
|
|
|
+ sql=update_sql2,
|
|
|
|
+ params=(
|
|
|
|
+ vid1 if vid1 else "NULL",
|
|
|
|
+ vid2 if vid2 else "NULL",
|
|
|
|
+ vid3 if vid3 else "NULL",
|
|
|
|
+ 2,
|
|
|
|
+ {int(params['process_times']) + 1},
|
|
|
|
+ params['trace_id']
|
|
|
|
+ )
|
|
|
|
+ )
|
|
logging(
|
|
logging(
|
|
- code="9006",
|
|
|
|
- info="视频生成文本测试",
|
|
|
|
|
|
+ code="9008",
|
|
|
|
+ info="视频搜索成功, 状态修改为2",
|
|
trace_id=params['trace_id']
|
|
trace_id=params['trace_id']
|
|
)
|
|
)
|
|
- await self.process_video_id(
|
|
|
|
- title=params['title'],
|
|
|
|
- trace_id=params['trace_id'],
|
|
|
|
- process_times=params['process_times']
|
|
|
|
- )
|
|
|
|
else:
|
|
else:
|
|
- await search_videos(
|
|
|
|
- params={"title": params['title'], "content": params['text'], "trace_id": params['trace_id']},
|
|
|
|
- trace_id=params['trace_id'],
|
|
|
|
- gh_id=params['gh_id'],
|
|
|
|
- mysql_client=self.mysql_client
|
|
|
|
- )
|
|
|
|
- # 执行完成之后,判断是否存在视频id
|
|
|
|
- select_sql = f"""
|
|
|
|
- SELECT recall_video_id1, recall_video_id2, recall_video_id3
|
|
|
|
- FROM {db_article}
|
|
|
|
- WHERE trace_id = '{params["trace_id"]}';
|
|
|
|
- """
|
|
|
|
- result = await self.mysql_client.async_select(sql=select_sql)
|
|
|
|
- vid1, vid2, vid3 = result[0]
|
|
|
|
- if vid1 or vid2 or vid3:
|
|
|
|
- update_sql2 = f"""
|
|
|
|
|
|
+ if int(params['process_times']) < 3:
|
|
|
|
+ update_sql3 = f"""
|
|
UPDATE {db_article}
|
|
UPDATE {db_article}
|
|
SET
|
|
SET
|
|
content_status = %s,
|
|
content_status = %s,
|
|
process_times = %s
|
|
process_times = %s
|
|
- WHERE trace_id = %s;
|
|
|
|
- """
|
|
|
|
|
|
+ WHERE trace_id = %s;
|
|
|
|
+ """
|
|
await self.mysql_client.async_insert(
|
|
await self.mysql_client.async_insert(
|
|
- sql=update_sql2,
|
|
|
|
- params=(
|
|
|
|
- 2, {int(params['process_times']) + 1}, params['trace_id']
|
|
|
|
- )
|
|
|
|
|
|
+ sql=update_sql3,
|
|
|
|
+ params=(0, int(params['process_times']) + 1, params['trace_id'])
|
|
)
|
|
)
|
|
logging(
|
|
logging(
|
|
- code="9008",
|
|
|
|
- info="视频搜索成功, 状态修改为2",
|
|
|
|
|
|
+ code="9018",
|
|
|
|
+ info="视频搜索失败,回退状态为0",
|
|
trace_id=params['trace_id']
|
|
trace_id=params['trace_id']
|
|
)
|
|
)
|
|
else:
|
|
else:
|
|
- if int(params['process_times']) < 5:
|
|
|
|
- update_sql3 = f"""
|
|
|
|
- UPDATE {db_article}
|
|
|
|
- SET
|
|
|
|
- content_status = %s,
|
|
|
|
- process_times = %s
|
|
|
|
- WHERE trace_id = %s;
|
|
|
|
- """
|
|
|
|
- await self.mysql_client.async_insert(
|
|
|
|
- sql=update_sql3,
|
|
|
|
- params=(0, int(params['process_times']) + 1, params['trace_id'])
|
|
|
|
- )
|
|
|
|
- logging(
|
|
|
|
- code="9018",
|
|
|
|
- info="视频搜索失败,回退状态为0",
|
|
|
|
- trace_id=params['trace_id']
|
|
|
|
- )
|
|
|
|
- else:
|
|
|
|
- update_sql3 = f"""
|
|
|
|
- UPDATE {db_article}
|
|
|
|
- SET
|
|
|
|
- content_status = %s,
|
|
|
|
- process_times = %s
|
|
|
|
- WHERE trace_id = %s;
|
|
|
|
- """
|
|
|
|
- await self.mysql_client.async_insert(
|
|
|
|
- sql=update_sql3,
|
|
|
|
- params=(3, int(params['process_times']) + 1, params['trace_id'])
|
|
|
|
- )
|
|
|
|
- logging(
|
|
|
|
- code="9019",
|
|
|
|
- info="视频多次搜索失败,状态修改为3",
|
|
|
|
- trace_id=params['trace_id']
|
|
|
|
- )
|
|
|
|
|
|
+ update_sql3 = f"""
|
|
|
|
+ UPDATE {db_article}
|
|
|
|
+ SET
|
|
|
|
+ content_status = %s,
|
|
|
|
+ process_times = %s
|
|
|
|
+ WHERE trace_id = %s;
|
|
|
|
+ """
|
|
|
|
+ await self.mysql_client.async_insert(
|
|
|
|
+ sql=update_sql3,
|
|
|
|
+ params=(3, int(params['process_times']) + 1, params['trace_id'])
|
|
|
|
+ )
|
|
|
|
+ logging(
|
|
|
|
+ code="9019",
|
|
|
|
+ info="视频多次搜索失败,状态修改为3",
|
|
|
|
+ trace_id=params['trace_id']
|
|
|
|
+ )
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- if int(params['process_times']) < 5:
|
|
|
|
|
|
+ if int(params['process_times']) < 3:
|
|
logging(
|
|
logging(
|
|
code="9018",
|
|
code="9018",
|
|
info="{}异常错误:{}, 回退状态为0".format(params['trace_id'], e),
|
|
info="{}异常错误:{}, 回退状态为0".format(params['trace_id'], e),
|
|
@@ -290,7 +261,7 @@ class ProcessDeal(object):
|
|
)
|
|
)
|
|
update_sql4 = f"""
|
|
update_sql4 = f"""
|
|
UPDATE {db_article}
|
|
UPDATE {db_article}
|
|
- SET
|
|
|
|
|
|
+ SET
|
|
content_status = %s,
|
|
content_status = %s,
|
|
process_times = %s
|
|
process_times = %s
|
|
WHERE trace_id = %s;
|
|
WHERE trace_id = %s;
|
|
@@ -307,7 +278,7 @@ class ProcessDeal(object):
|
|
)
|
|
)
|
|
update_sql4 = f"""
|
|
update_sql4 = f"""
|
|
UPDATE {db_article}
|
|
UPDATE {db_article}
|
|
- SET
|
|
|
|
|
|
+ SET
|
|
content_status = %s,
|
|
content_status = %s,
|
|
process_times = %s
|
|
process_times = %s
|
|
WHERE trace_id = %s;
|
|
WHERE trace_id = %s;
|
|
@@ -325,16 +296,16 @@ class ProcessDeal(object):
|
|
"""
|
|
"""
|
|
content_id = params['content_id']
|
|
content_id = params['content_id']
|
|
trace_id = params['trace_id']
|
|
trace_id = params['trace_id']
|
|
- # 判断该文章是否已经生成了
|
|
|
|
- history_trace_id = await self.get_history_contents(content_id)
|
|
|
|
- if history_trace_id:
|
|
|
|
|
|
+ # 判断该篇文章是否存在未下架的视频,且判断是否有3条, 如果没有三条,则启动新抓取任务,后续优化点
|
|
|
|
+ video_id_list = await self.get_history_videos(content_id=content_id)
|
|
|
|
+ if video_id_list:
|
|
# 说明已经存在了结果, 将该条记录下的video_id拿出来
|
|
# 说明已经存在了结果, 将该条记录下的video_id拿出来
|
|
logging(
|
|
logging(
|
|
code="9001",
|
|
code="9001",
|
|
info="存在历史文章",
|
|
info="存在历史文章",
|
|
trace_id=trace_id
|
|
trace_id=trace_id
|
|
)
|
|
)
|
|
- await self.insert_history_contents_videos(history_trace_id, params)
|
|
|
|
|
|
+ # await self.use_exists_contents_videos(video_id_list=video_id_list, params=params)
|
|
else:
|
|
else:
|
|
flag = await self.judge_content_processing(content_id)
|
|
flag = await self.judge_content_processing(content_id)
|
|
if flag:
|
|
if flag:
|