sunxy 11 kuukautta sitten
vanhempi
commit
e371361389
4 muutettua tiedostoa jossa 36 lisäystä ja 9 poistoa
  1. 18 0
      ReadXlsxFile.py
  2. 1 1
      ai_tag_task.sh
  3. 17 8
      download_videos_task.py
  4. BIN
      past_videos.xlsx

+ 18 - 0
ReadXlsxFile.py

@@ -0,0 +1,18 @@
+import pandas as pd
+
+# 读取Excel文件
+
+
+def getVideoInfoInXlxs(xlsx_file):
+    df = pd.read_excel(xlsx_file)
+
+    feature_data = []
+    for index, row in df.iterrows():
+        item = {}
+        item['videoid'] = row['videoid']
+        item['title'] = row['title']
+        item['video_path'] = row['video_path']
+        feature_data.append(item)
+
+    feature_df = pd.DataFrame(feature_data)
+    return feature_df

+ 1 - 1
ai_tag_task.sh

@@ -1,6 +1,6 @@
 ps -ef | grep ai_tag_task.py | grep -v grep | awk '{print $2}' | xargs kill -9
 
-# cd /data/aigc-test
+cd /sunxy/aigc-test 
 
 source activate srt
 

+ 17 - 8
download_videos_task.py

@@ -8,6 +8,7 @@ from utils import data_check, get_feature_data
 from config import set_config
 from log import Log
 import ODPSQueryUtil
+from ReadXlsxFile import getVideoInfoInXlxs
 config_ = set_config()
 log_ = Log()
 features = ['videoid', 'title', 'video_path']
@@ -21,14 +22,10 @@ def download_video_from_oss(video_id, video_path, download_folder):
         download_folder = f"{download_folder}_{pid}"
         if not os.path.exists(download_folder):
             os.makedirs(download_folder)
-        video_local_dir = os.path.join(download_folder, video_id)
+        video_local_dir = os.path.join(download_folder, str(video_id))
         os.makedirs(video_local_dir)
         video_filename = video_path.split('/')[-1]
         video_local_path = os.path.join(video_local_dir, video_filename)
-        # 判断文件不存在
-        if os.path.exists(video_local_path):
-            print(f"{video_id} already exists!")
-            return
         # 阿里云账号AccessKey拥有所有API的访问权限,风险很高。强烈建议您创建并使用RAM用户进行API访问或日常运维,请登录RAM控制台创建RAM用户。
         # auth = oss2.Auth(access_key_id=config_.ODPS_CONFIG['ACCESSID'], access_key_secret=config_.ODPS_CONFIG['ACCESSKEY'])
         auth = oss2.Auth(access_key_id=config_.OSS_CONFIG['accessKeyId'],
@@ -105,6 +102,18 @@ def timer_check():
 
 if __name__ == '__main__':
     # timer_check()
-    download_videos('loghubods', 'vid_daily_top_not_taged', '20240426')
-    download_videos('loghubods', 'vid_daily_top_not_taged', '20240427')
-    download_videos('loghubods', 'vid_daily_top_not_taged', '20240428')
+    feature_df = getVideoInfoInXlxs('past_videos.xlsx')
+    download_folder = 'videos'
+    video_id_list = feature_df['videoid'].to_list()
+    pool = multiprocessing.Pool(processes=6)
+    for video_id in video_id_list:
+        video_path = feature_df[feature_df['videoid']
+                                == video_id]['video_path'].values[0].strip()
+        video_path = video_path.replace(' ', '')
+        print(video_id, video_path)
+        pool.apply_async(
+            func=download_video_from_oss,
+            args=(video_id, video_path, download_folder)
+        )
+    pool.close()
+    pool.join()

BIN
past_videos.xlsx