sunxy 1 year ago
parent
commit
f5527af645
8 changed files with 41 additions and 10 deletions
  1. 1 1
      ODPSQueryUtil.py
  2. 1 1
      ai_tag_task.py
  3. 4 4
      ai_tag_task.sh
  4. BIN
      archive.tar.gz
  5. BIN
      asr_res.tar.gz
  6. 2 2
      asr_task.sh
  7. 31 0
      delete_videos.sh
  8. 2 2
      download_videos_task.py

+ 1 - 1
ODPSQueryUtil.py

@@ -15,7 +15,7 @@ odps = ODPS(
 
 def query_videos(start_idx, limit):
     # 查询视频标题的表现(从阿里云odps中查询)
-    sql = f"SELECT DISTINCT a.videoid, a.title, transed_video_path AS video_path FROM loghubods.video_return_top_500 a LEFT JOIN videoods.dim_video b ON      a.videoid = b.videoid LEFT JOIN videoods.wx_video c ON      a.videoid = c.id WHERE a.dt >= 20230101 ORDER BY videoid LIMIT {start_idx}, {limit};"
+    sql = f"SELECT DISTINCT a.videoid, a.title, transed_video_path AS video_path FROM loghubods.video_return_top_500 a LEFT JOIN videoods.dim_video b ON      a.videoid = b.videoid LEFT JOIN videoods.wx_video c ON      a.videoid = c.id WHERE a.dt >= 20231001 ORDER BY videoid LIMIT {start_idx}, {limit};"
     result = []
     with odps.execute_sql(sql).open_reader() as reader:
         for record in reader:

+ 1 - 1
ai_tag_task.py

@@ -267,7 +267,7 @@ def timer_check():
 if __name__ == '__main__':
     # timer_check()
     size = 10000
-    for i in range(0, 2000, size):
+    for i in range(0, 10000, size):
         print(f"query_videos start i = {i} ...")
         records = ODPSQueryUtil.query_videos(i, size)
         if records is None or len(records) == 0:

+ 4 - 4
ai_tag_task.sh

@@ -1,9 +1,9 @@
 ps -ef | grep ai_tag_task.py | grep -v grep | awk '{print $2}' | xargs kill -9
 
-cd /data/aigc-test
+# cd /data/aigc-test
 
-source activate whisper
+# source activate whisper
 
-python ai_tag_task.py
+nohup python ai_tag_task.py > logs/ai_tag_task.log 2>&1 &
 
-conda deactivate
+# conda deactivate

BIN
archive.tar.gz


BIN
asr_res.tar.gz


+ 2 - 2
asr_task.sh

@@ -3,8 +3,8 @@ ps -ef | grep asr_task.py | grep -v grep | awk '{print $2}' | xargs kill -9
 rm -r asr_res/
 
 # source activate aigc-test
+nohup env CUDA_VISIBLE_DEVICES=0 python asr_task.py 0 > logs/asr_task_0.log 2>&1 &
 
-CUDA_VISIBLE_DEVICES=0 python asr_task.py 0 > logs/asr_task_0.log &
-CUDA_VISIBLE_DEVICES=1 python asr_task.py 1 > logs/asr_task_1.log
+nohup env CUDA_VISIBLE_DEVICES=1 python asr_task.py 1 > logs/asr_task_1.log 2>&1 &
 
 # conda deactivate

+ 31 - 0
delete_videos.sh

@@ -0,0 +1,31 @@
+#!/bin/bash
+
+# 定义包含视频文件夹和ASR结果的目录路径
+videos_dirs=("videos_0" "videos_1")
+asr_dir="asr_res"
+
+# 遍历ASR结果目录中的所有txt文件
+for file in "$asr_dir"/*.txt; do
+  # 获取文件大小
+  size=$(stat -c%s "$file")
+
+  # 检查文件大小是否大于0
+  if [ "$size" -gt 0 ]; then
+    # 从文件名获取ID
+    id=$(basename "$file" .txt)
+
+    # 遍历视频目录数组
+    for videos_dir in "${videos_dirs[@]}"; do
+      # 构建目标目录路径
+      target_dir="$videos_dir/$id"
+
+      # 检查目标目录是否存在
+      if [ -d "$target_dir" ]; then
+        # 删除目标目录
+        echo "删除目录:$target_dir"
+        rm -rf "$target_dir"
+      fi
+    done
+  fi
+done
+

+ 2 - 2
download_videos_task.py

@@ -100,8 +100,8 @@ def timer_check():
 
 if __name__ == '__main__':
     # timer_check()
-    size = 2000
-    for i in range(0, 1000, size):
+    size = 10000
+    for i in range(0, 10000, size):
         print(f"query_videos start i = {i} ...")
         records = ODPSQueryUtil.query_videos(i, size)
         if records is None or len(records) == 0: