|
@@ -8,6 +8,7 @@ from utils import data_check, get_feature_data
|
|
|
from config import set_config
|
|
|
from log import Log
|
|
|
import ODPSQueryUtil
|
|
|
+from ReadXlsxFile import getVideoInfoInXlxs
|
|
|
config_ = set_config()
|
|
|
log_ = Log()
|
|
|
features = ['videoid', 'title', 'video_path']
|
|
@@ -21,14 +22,10 @@ def download_video_from_oss(video_id, video_path, download_folder):
|
|
|
download_folder = f"{download_folder}_{pid}"
|
|
|
if not os.path.exists(download_folder):
|
|
|
os.makedirs(download_folder)
|
|
|
- video_local_dir = os.path.join(download_folder, video_id)
|
|
|
+ video_local_dir = os.path.join(download_folder, str(video_id))
|
|
|
os.makedirs(video_local_dir)
|
|
|
video_filename = video_path.split('/')[-1]
|
|
|
video_local_path = os.path.join(video_local_dir, video_filename)
|
|
|
- # 判断文件不存在
|
|
|
- if os.path.exists(video_local_path):
|
|
|
- print(f"{video_id} already exists!")
|
|
|
- return
|
|
|
# 阿里云账号AccessKey拥有所有API的访问权限,风险很高。强烈建议您创建并使用RAM用户进行API访问或日常运维,请登录RAM控制台创建RAM用户。
|
|
|
# auth = oss2.Auth(access_key_id=config_.ODPS_CONFIG['ACCESSID'], access_key_secret=config_.ODPS_CONFIG['ACCESSKEY'])
|
|
|
auth = oss2.Auth(access_key_id=config_.OSS_CONFIG['accessKeyId'],
|
|
@@ -105,6 +102,18 @@ def timer_check():
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
# timer_check()
|
|
|
- download_videos('loghubods', 'vid_daily_top_not_taged', '20240426')
|
|
|
- download_videos('loghubods', 'vid_daily_top_not_taged', '20240427')
|
|
|
- download_videos('loghubods', 'vid_daily_top_not_taged', '20240428')
|
|
|
+ feature_df = getVideoInfoInXlxs('past_videos.xlsx')
|
|
|
+ download_folder = 'videos'
|
|
|
+ video_id_list = feature_df['videoid'].to_list()
|
|
|
+ pool = multiprocessing.Pool(processes=6)
|
|
|
+ for video_id in video_id_list:
|
|
|
+ video_path = feature_df[feature_df['videoid']
|
|
|
+ == video_id]['video_path'].values[0].strip()
|
|
|
+ video_path = video_path.replace(' ', '')
|
|
|
+ print(video_id, video_path)
|
|
|
+ pool.apply_async(
|
|
|
+ func=download_video_from_oss,
|
|
|
+ args=(video_id, video_path, download_folder)
|
|
|
+ )
|
|
|
+ pool.close()
|
|
|
+ pool.join()
|