""" @author: luojunhui @tool: pycharm && deepseek """ import json import time import traceback from applications import log from applications.db import DatabaseConnector from applications.utils import download_sph_video from applications.utils import str_to_md5 from applications.utils import upload_to_oss from config import long_articles_config from coldStartTasks.crawler.channels import get_channel_account_videos NO_SOURCE_ACCOUNT = 0 class CrawlerChannelAccountVideos: """ crawler channel account videos """ def __init__(self): self.db_client = DatabaseConnector(db_config=long_articles_config) self.db_client.connect() self.success_crawler_video_count = 0 def get_channel_account_list(self): """ get channel account list from database """ return def crawler_each_account(self, channel_account_id: str, channel_account_name: str): """ get channel account videos """ response = get_channel_account_videos(channel_account_id) if response['ret'] == 200: response_data = response['data'] last_buffer = response_data['lastBuffer'] continue_flag = response_data['continueFlag'] video_list = response_data['object'] for video in video_list[:1]: video_id = str(video['id']) account_name = video['nickname'] object_desc = video['objectDesc'] publish_timestamp = video['createtime'] title = object_desc['description'] media = object_desc['media'][0] url = media['Url'] decode_key = media['decodeKey'] url_token = media['urlToken'] download_url = url + url_token try: decrypt_path = download_sph_video(download_url=download_url, key=decode_key) oss_path = upload_to_oss(decrypt_path) insert_sql = f""" insert into publish_single_video_source (content_trace_id, article_title, out_account_id, out_account_name, video_oss_path, publish_timestamp, crawler_timestamp, url_unique_md5, platform, source_account) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s); """ try: self.db_client.save( query=insert_sql, params=( "video{}".format(str_to_md5(video_id)), title, channel_account_id, account_name, oss_path, publish_timestamp, int(time.time()), video_id, "sph", NO_SOURCE_ACCOUNT ), ) self.success_crawler_video_count += 1 except Exception as e: log( task="baidu_video_crawler", function="save_each_video", message="save video failed", data={ "error": str(e), "traceback": traceback.format_exc(), "video_id": video_id, "oss_path": oss_path, }, ) except Exception as e: print("download video error:", e) else: print(f"crawler channel account {channel_account_name} videos failed") return