""" @author: luojunhui """ from typing import Dict, List from applications.const import new_content_id_task_const async def whether_downloaded_videos_exists(content_id, article_crawler_video_table, db_client): """ 判断该文章是否存在历史匹配视频 :param content_id: :param article_crawler_video_table: 爬虫表 :param db_client :return: """ sql = f""" SELECT id FROM {article_crawler_video_table} WHERE content_id = '{content_id}' AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS}; """ res_tuple = await db_client.async_select(sql) if len(res_tuple) >= new_content_id_task_const.MIN_MATCH_VIDEO_NUM: return True else: return False async def get_downloaded_videos(content_id, article_crawler_video_table, db_client) -> List[Dict]: """ 获取已下载的视频 :return: """ sql = f""" SELECT platform, play_count, like_count, video_oss_path, cover_oss_path, user_id FROM {article_crawler_video_table} WHERE content_id = '{content_id}' AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS} ORDER BY score DESC; """ res_tuple = await db_client.async_select(sql) return [ { "platform": i[0], "play_count": i[1], "like_count": i[2], "video_oss_path": i[3], "cover_oss_path": i[4], "uid": i[5] } for i in res_tuple ] async def update_crawler_table_with_exist_content_id(root_content_id, content_id, trace_id, article_crawler_video_table, db_client): """ 用root_content_id 查询出已经下载过的视频信息,用new_content_id更新 """ select_sql = f""" SELECT out_video_id, platform, video_title, play_count, like_count, share_count, publish_time, crawler_time, duration, video_url, cover_url, download_status, video_oss_path, cover_oss_path, user_id, score, is_illegal FROM {article_crawler_video_table} WHERE content_id = '{root_content_id}' AND download_status = {new_content_id_task_const.VIDEO_DOWNLOAD_SUCCESS_STATUS} AND is_illegal = {new_content_id_task_const.VIDEO_SAFE}; """ res_tuple = await db_client.async_select(select_sql) if res_tuple: insert_list = [(trace_id, content_id) + row for row in res_tuple] insert_sql = f""" INSERT INTO {article_crawler_video_table} (trace_id, content_id, out_video_id, platform, video_title, play_count, like_count, share_count, publish_time, crawler_time, duration, video_url, cover_url, download_status, video_oss_path, cover_oss_path, user_id, score, is_illegal) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); """ affected_rows = await db_client.async_insert_many(sql=insert_sql, params_list=insert_list) return affected_rows else: return 0