|
@@ -8,10 +8,23 @@ from utils import data_check, get_feature_data
|
|
|
from config import set_config
|
|
|
from log import Log
|
|
|
import ODPSQueryUtil
|
|
|
+from ReadXlsxFile import getVideoInfoInXlxs
|
|
|
+import requests
|
|
|
config_ = set_config()
|
|
|
log_ = Log()
|
|
|
features = ['videoid', 'title', 'video_path']
|
|
|
|
|
|
+cdn_domain = 'http://rescdn.yishihui.com/'
|
|
|
+
|
|
|
+
|
|
|
+def download_file(url, local_path):
|
|
|
+ response = requests.get(url)
|
|
|
+ if response.status_code == 200:
|
|
|
+ with open(local_path, 'wb') as f:
|
|
|
+ f.write(response.content)
|
|
|
+ else:
|
|
|
+ print(f"Failed to download {url}")
|
|
|
+
|
|
|
|
|
|
def download_video_from_oss(video_id, video_path, download_folder):
|
|
|
"""从oss下载视频"""
|
|
@@ -21,21 +34,14 @@ def download_video_from_oss(video_id, video_path, download_folder):
|
|
|
download_folder = f"{download_folder}_{pid}"
|
|
|
if not os.path.exists(download_folder):
|
|
|
os.makedirs(download_folder)
|
|
|
- video_local_dir = os.path.join(download_folder, video_id)
|
|
|
+ video_local_dir = os.path.join(download_folder, str(video_id))
|
|
|
os.makedirs(video_local_dir)
|
|
|
video_filename = video_path.split('/')[-1]
|
|
|
video_local_path = os.path.join(video_local_dir, video_filename)
|
|
|
- # 阿里云账号AccessKey拥有所有API的访问权限,风险很高。强烈建议您创建并使用RAM用户进行API访问或日常运维,请登录RAM控制台创建RAM用户。
|
|
|
- # auth = oss2.Auth(access_key_id=config_.ODPS_CONFIG['ACCESSID'], access_key_secret=config_.ODPS_CONFIG['ACCESSKEY'])
|
|
|
- auth = oss2.Auth(access_key_id=config_.OSS_CONFIG['accessKeyId'],
|
|
|
- access_key_secret=config_.OSS_CONFIG['accessKeySecret'])
|
|
|
- # Endpoint以杭州为例,其它Region请按实际情况填写。
|
|
|
- bucket = oss2.Bucket(
|
|
|
- auth, endpoint=config_.OSS_CONFIG['endpoint'], bucket_name='art-pubbucket')
|
|
|
- # 下载OSS文件到本地文件。
|
|
|
- # <yourObjectName>由包含文件后缀,不包含Bucket名称组成的Object完整路径,例如abc/efg/123.jpg。
|
|
|
- # <yourLocalFile>由本地文件路径加文件名包括后缀组成,例如/users/local/myfile.txt。
|
|
|
- bucket.get_object_to_file(video_path, video_local_path)
|
|
|
+
|
|
|
+ # 下载视频文件到本地
|
|
|
+ video_url = cdn_domain + video_path
|
|
|
+ download_file(video_url, video_local_path)
|
|
|
|
|
|
# m3u8文件,需下载所有ts文件
|
|
|
if video_filename.split('.')[-1] == 'm3u8':
|
|
@@ -46,9 +52,9 @@ def download_video_from_oss(video_id, video_path, download_folder):
|
|
|
line = line.strip()
|
|
|
print(line)
|
|
|
if line[-3:] == '.ts':
|
|
|
- ts_path = os.path.join(root_path, line)
|
|
|
+ ts_url = cdn_domain + os.path.join(root_path, line)
|
|
|
ts_local_path = os.path.join(video_local_dir, line)
|
|
|
- bucket.get_object_to_file(ts_path, ts_local_path)
|
|
|
+ download_file(ts_url, ts_local_path)
|
|
|
print(f"{video_id} download end!")
|
|
|
except:
|
|
|
print(f"{video_id} download fail!")
|
|
@@ -79,6 +85,7 @@ def timer_check():
|
|
|
try:
|
|
|
project = config_.DAILY_VIDEO['project']
|
|
|
table = config_.DAILY_VIDEO['table']
|
|
|
+ # 昨天
|
|
|
now_date = datetime.datetime.today()
|
|
|
print(f"now_date: {datetime.datetime.strftime(now_date, '%Y%m%d')}")
|
|
|
dt = datetime.datetime.strftime(
|
|
@@ -99,4 +106,19 @@ def timer_check():
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
- timer_check()
|
|
|
+ # timer_check()
|
|
|
+ feature_df = getVideoInfoInXlxs('past_videos.xlsx')
|
|
|
+ download_folder = 'videos'
|
|
|
+ video_id_list = feature_df['videoid'].to_list()
|
|
|
+ pool = multiprocessing.Pool(processes=6)
|
|
|
+ for video_id in video_id_list:
|
|
|
+ video_path = feature_df[feature_df['videoid']
|
|
|
+ == video_id]['video_path'].values[0].strip()
|
|
|
+ video_path = video_path.replace(' ', '')
|
|
|
+ print(video_id, video_path)
|
|
|
+ pool.apply_async(
|
|
|
+ func=download_video_from_oss,
|
|
|
+ args=(video_id, video_path, download_folder)
|
|
|
+ )
|
|
|
+ pool.close()
|
|
|
+ pool.join()
|