123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- """
- @author: luojunhui
- """
- from typing import Dict, List
- from applications.const import new_content_id_task_const
- async def whether_downloaded_videos_exists(content_id, article_crawler_video_table, db_client):
- """
- 判断该文章是否存在历史匹配视频
- :param content_id:
- :param article_crawler_video_table: 爬虫表
- :param db_client
- :return:
- """
- sql = f"""
- SELECT id
- FROM {article_crawler_video_table}
- WHERE content_id = '{content_id}'
- AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS}
- AND is_illegal = {new_content_id_task_const.VIDEO_SAFE};
- """
- res_tuple = await db_client.async_select(sql)
- if len(res_tuple) >= new_content_id_task_const.MIN_MATCH_VIDEO_NUM:
- return True
- else:
- return False
- async def get_downloaded_videos(content_id, article_crawler_video_table, db_client) -> List[Dict]:
- """
- 获取已下载的视频
- :return:
- """
- sql = f"""
- SELECT platform, play_count, like_count, video_oss_path, cover_oss_path, user_id
- FROM {article_crawler_video_table}
- WHERE content_id = '{content_id}'
- AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS}
- AND is_illegal = {new_content_id_task_const.VIDEO_SAFE}
- ORDER BY score DESC;
- """
- res_tuple = await db_client.async_select(sql)
- return [
- {
- "platform": i[0],
- "play_count": i[1],
- "like_count": i[2],
- "video_oss_path": i[3],
- "cover_oss_path": i[4],
- "uid": i[5]
- }
- for i in res_tuple
- ]
- async def update_crawler_table_with_exist_content_id(root_content_id, content_id, trace_id, article_crawler_video_table, db_client):
- """
- 用root_content_id 查询出已经下载过的视频信息,用new_content_id更新
- """
- select_sql = f"""
- SELECT out_video_id, platform, video_title, play_count, like_count, share_count, publish_time, crawler_time,
- duration, video_url, cover_url, download_status, video_oss_path, cover_oss_path, user_id, score, is_illegal
- FROM {article_crawler_video_table}
- WHERE content_id = '{root_content_id}'
- AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS}
- AND is_illegal = {new_content_id_task_const.VIDEO_SAFE};
- """
- res_tuple = await db_client.async_select(select_sql)
- insert_list = [(trace_id, content_id) + row for row in res_tuple]
- insert_sql = f"""
- INSERT INTO {article_crawler_video_table}
- (trace_id, content_id, out_video_id, platform, video_title, play_count, like_count, share_count, publish_time, crawler_time,
- duration, video_url, cover_url, download_status, video_oss_path, cover_oss_path, user_id, score, is_illegal)
- VALUES
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
- """
- await db_client.async_insert_many(sql=insert_sql, params_list=insert_list)
|