|  | @@ -1,9 +1,9 @@
 | 
	
		
			
				|  |  |  """
 | 
	
		
			
				|  |  |  @author: luojunhui
 | 
	
		
			
				|  |  | -todo: 加上多进程锁
 | 
	
		
			
				|  |  |  """
 | 
	
		
			
				|  |  |  import os
 | 
	
		
			
				|  |  |  import time
 | 
	
		
			
				|  |  | +import traceback
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  import requests
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -11,13 +11,23 @@ from pymysql.cursors import DictCursor
 | 
	
		
			
				|  |  |  from tqdm import tqdm
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  from applications.api import GoogleAIAPI
 | 
	
		
			
				|  |  | +from applications.const import VideoToTextConst
 | 
	
		
			
				|  |  |  from applications.db import DatabaseConnector
 | 
	
		
			
				|  |  |  from config import long_articles_config
 | 
	
		
			
				|  |  | +from config import apolloConfig
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  # 办公室网络调试需要打开代理
 | 
	
		
			
				|  |  |  # os.environ["HTTP_PROXY"] = "http://192.168.100.20:1087"
 | 
	
		
			
				|  |  |  # os.environ["HTTPS_PROXY"] = "http://192.168.100.20:1087"
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +const = VideoToTextConst()
 | 
	
		
			
				|  |  | +config = apolloConfig(env="prod")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# pool_size
 | 
	
		
			
				|  |  | +POOL_SIZE = int(config.getConfigValue("video_extract_pool_size"))
 | 
	
		
			
				|  |  | +# batch_size
 | 
	
		
			
				|  |  | +BATCH_SIZE = int(config.getConfigValue("video_extract_batch_size"))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def download_file(pq_vid, video_url):
 | 
	
		
			
				|  |  |      """
 | 
	
	
		
			
				|  | @@ -58,14 +68,14 @@ class GenerateTextFromVideo(object):
 | 
	
		
			
				|  |  |          sql = f"""
 | 
	
		
			
				|  |  |          select article_title, concat('https://rescdn.yishihui.com/', video_oss_path ) as video_url, audit_video_id
 | 
	
		
			
				|  |  |          from publish_single_video_source 
 | 
	
		
			
				|  |  | -        where audit_status = 1 and bad_status = 0 and extract_status = 0
 | 
	
		
			
				|  |  | +        where audit_status = {const.AUDIT_SUCCESS_STATUS} and bad_status = {const.ARTICLE_GOOD_STATUS} and extract_status = {const.EXTRACT_INIT_STATUS}
 | 
	
		
			
				|  |  |          order by id desc;
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          task_list = self.db.fetch(sql, cursor_type=DictCursor)
 | 
	
		
			
				|  |  |          insert_sql = f"""
 | 
	
		
			
				|  |  |          insert ignore into video_content_understanding
 | 
	
		
			
				|  |  |              (pq_vid, video_ori_title, video_oss_path)
 | 
	
		
			
				|  |  | -        values (%s, %s, %s)
 | 
	
		
			
				|  |  | +        values (%s, %s, %s);
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          affected_rows = self.db.save_many(
 | 
	
		
			
				|  |  |              insert_sql,
 | 
	
	
		
			
				|  | @@ -73,7 +83,22 @@ class GenerateTextFromVideo(object):
 | 
	
		
			
				|  |  |          )
 | 
	
		
			
				|  |  |          print(affected_rows)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    def upload_video_to_google_ai(self, max_processing_video_count=20):
 | 
	
		
			
				|  |  | +    def update_video_status(self, ori_status, new_status, pq_vid):
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        更新视频状态
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        sql = f"""
 | 
	
		
			
				|  |  | +            update video_content_understanding
 | 
	
		
			
				|  |  | +            set status = %s
 | 
	
		
			
				|  |  | +            WHERE pq_vid = %s and status = %s;
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        affected_rows = self.db.save(
 | 
	
		
			
				|  |  | +            query=sql,
 | 
	
		
			
				|  |  | +            params=(new_status, pq_vid, ori_status)
 | 
	
		
			
				|  |  | +        )
 | 
	
		
			
				|  |  | +        return affected_rows
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def upload_video_to_google_ai(self, max_processing_video_count=POOL_SIZE):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          上传视频到Google AI
 | 
	
		
			
				|  |  |          max_processing_video_count: 处理中的最大视频数量,默认20
 | 
	
	
		
			
				|  | @@ -83,31 +108,63 @@ class GenerateTextFromVideo(object):
 | 
	
		
			
				|  |  |          2: 处理完成
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          # 查询出在视频处于PROCESSING状态的视频数量
 | 
	
		
			
				|  |  | -        select_sql = "select count(1) as processing_count from video_content_understanding where status = 1;"
 | 
	
		
			
				|  |  | +        select_sql = f"""
 | 
	
		
			
				|  |  | +            select count(1) as processing_count 
 | 
	
		
			
				|  |  | +            from video_content_understanding 
 | 
	
		
			
				|  |  | +            where status = {const.VIDEO_UNDERSTAND_PROCESSING_STATUS};
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  |          count = self.db.fetch(select_sql, cursor_type=DictCursor)[0]['processing_count']
 | 
	
		
			
				|  |  |          rest_video_count = max_processing_video_count - count
 | 
	
		
			
				|  |  |          success_upload_count = 0
 | 
	
		
			
				|  |  |          if rest_video_count:
 | 
	
		
			
				|  |  | -            sql = f"""select pq_vid, video_oss_path from video_content_understanding where status = 0 limit {rest_video_count};"""
 | 
	
		
			
				|  |  | +            sql = f"""
 | 
	
		
			
				|  |  | +                select pq_vid, video_oss_path 
 | 
	
		
			
				|  |  | +                from video_content_understanding 
 | 
	
		
			
				|  |  | +                where status = {const.VIDEO_UNDERSTAND_INIT_STATUS} 
 | 
	
		
			
				|  |  | +                limit {rest_video_count};
 | 
	
		
			
				|  |  | +            """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |              task_list = self.db.fetch(sql, cursor_type=DictCursor)
 | 
	
		
			
				|  |  |              for task in tqdm(task_list, desc="upload_video_task"):
 | 
	
		
			
				|  |  | -                file_path = download_file(task['pq_vid'], task['video_oss_path'])
 | 
	
		
			
				|  |  | -                google_upload_result = self.google_ai_api.upload_file(file_path)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -                if google_upload_result:
 | 
	
		
			
				|  |  | -                    file_name, file_state, expire_time = google_upload_result
 | 
	
		
			
				|  |  | -                    update_sql = f"""
 | 
	
		
			
				|  |  | -                        update video_content_understanding
 | 
	
		
			
				|  |  | -                        set status = %s, file_name = %s, file_state = %s, file_expire_time = %s
 | 
	
		
			
				|  |  | -                        where pq_vid = %s;
 | 
	
		
			
				|  |  | -                    """
 | 
	
		
			
				|  |  | -                    self.db.save(
 | 
	
		
			
				|  |  | -                        update_sql,
 | 
	
		
			
				|  |  | -                        params=(1, file_name, file_state, expire_time, task['pq_vid'])
 | 
	
		
			
				|  |  | -                    )
 | 
	
		
			
				|  |  | -                    success_upload_count += 1
 | 
	
		
			
				|  |  | -                else:
 | 
	
		
			
				|  |  | +                lock_rows = self.update_video_status(
 | 
	
		
			
				|  |  | +                    ori_status=const.VIDEO_UNDERSTAND_INIT_STATUS,
 | 
	
		
			
				|  |  | +                    new_status=const.VIDEO_LOCK,
 | 
	
		
			
				|  |  | +                    pq_vid=task['pq_vid'],
 | 
	
		
			
				|  |  | +                )
 | 
	
		
			
				|  |  | +                if not lock_rows:
 | 
	
		
			
				|  |  |                      continue
 | 
	
		
			
				|  |  | +                try:
 | 
	
		
			
				|  |  | +                    file_path = download_file(task['pq_vid'], task['video_oss_path'])
 | 
	
		
			
				|  |  | +                    google_upload_result = self.google_ai_api.upload_file(file_path)
 | 
	
		
			
				|  |  | +                    if google_upload_result:
 | 
	
		
			
				|  |  | +                        file_name, file_state, expire_time = google_upload_result
 | 
	
		
			
				|  |  | +                        update_sql = f"""
 | 
	
		
			
				|  |  | +                            update video_content_understanding
 | 
	
		
			
				|  |  | +                            set status = %s, file_name = %s, file_state = %s, file_expire_time = %s
 | 
	
		
			
				|  |  | +                            where pq_vid = %s and status = %s;
 | 
	
		
			
				|  |  | +                        """
 | 
	
		
			
				|  |  | +                        self.db.save(
 | 
	
		
			
				|  |  | +                            update_sql,
 | 
	
		
			
				|  |  | +                            params=(
 | 
	
		
			
				|  |  | +                                const.VIDEO_UNDERSTAND_PROCESSING_STATUS,
 | 
	
		
			
				|  |  | +                                file_name,
 | 
	
		
			
				|  |  | +                                file_state,
 | 
	
		
			
				|  |  | +                                expire_time,
 | 
	
		
			
				|  |  | +                                task['pq_vid'],
 | 
	
		
			
				|  |  | +                                const.VIDEO_LOCK
 | 
	
		
			
				|  |  | +                            )
 | 
	
		
			
				|  |  | +                        )
 | 
	
		
			
				|  |  | +                        success_upload_count += 1
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                except Exception as e:
 | 
	
		
			
				|  |  | +                    print("task upload failed because of {}".format(e))
 | 
	
		
			
				|  |  | +                    print("trace_back: ", traceback.format_exc())
 | 
	
		
			
				|  |  | +                    # roll back status
 | 
	
		
			
				|  |  | +                    self.update_video_status(
 | 
	
		
			
				|  |  | +                        ori_status=const.VIDEO_LOCK,
 | 
	
		
			
				|  |  | +                        new_status=const.VIDEO_UNDERSTAND_INIT_STATUS,
 | 
	
		
			
				|  |  | +                        pq_vid=task['pq_vid'],
 | 
	
		
			
				|  |  | +                    )
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          return success_upload_count
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -121,7 +178,13 @@ class GenerateTextFromVideo(object):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          获取处理视频转文本任务
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  | -        sql = "select pq_vid, file_name from video_content_understanding where status = 1 order by file_expire_time limit 10;"
 | 
	
		
			
				|  |  | +        sql = f"""
 | 
	
		
			
				|  |  | +            select pq_vid, file_name 
 | 
	
		
			
				|  |  | +            from video_content_understanding 
 | 
	
		
			
				|  |  | +            where status = {const.VIDEO_UNDERSTAND_PROCESSING_STATUS} 
 | 
	
		
			
				|  |  | +            order by file_expire_time 
 | 
	
		
			
				|  |  | +            limit {BATCH_SIZE};
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  |          task_list = self.db.fetch(sql, cursor_type=DictCursor)
 | 
	
		
			
				|  |  |          return task_list
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -132,6 +195,14 @@ class GenerateTextFromVideo(object):
 | 
	
		
			
				|  |  |          task_list = self.get_tasks()
 | 
	
		
			
				|  |  |          while task_list:
 | 
	
		
			
				|  |  |              for task in tqdm(task_list, desc="convert video to text"):
 | 
	
		
			
				|  |  | +                # LOCK TASK
 | 
	
		
			
				|  |  | +                lock_row = self.update_video_status(
 | 
	
		
			
				|  |  | +                    ori_status=const.VIDEO_UNDERSTAND_PROCESSING_STATUS,
 | 
	
		
			
				|  |  | +                    new_status=const.VIDEO_LOCK,
 | 
	
		
			
				|  |  | +                    pq_vid=task['pq_vid'],
 | 
	
		
			
				|  |  | +                )
 | 
	
		
			
				|  |  | +                if not lock_row:
 | 
	
		
			
				|  |  | +                    continue
 | 
	
		
			
				|  |  |                  file_name = task['file_name']
 | 
	
		
			
				|  |  |                  video_local_path = "static/{}.mp4".format(task['pq_vid'])
 | 
	
		
			
				|  |  |                  google_file = self.google_ai_api.get_google_file(file_name)
 | 
	
	
		
			
				|  | @@ -147,42 +218,67 @@ class GenerateTextFromVideo(object):
 | 
	
		
			
				|  |  |                                  update_sql = f"""
 | 
	
		
			
				|  |  |                                      update video_content_understanding
 | 
	
		
			
				|  |  |                                      set status = %s, video_text = %s, file_state = %s
 | 
	
		
			
				|  |  | -                                    where pq_vid = %s;
 | 
	
		
			
				|  |  | +                                    where pq_vid = %s and status = %s;
 | 
	
		
			
				|  |  |                                  """
 | 
	
		
			
				|  |  |                                  self.db.save(
 | 
	
		
			
				|  |  |                                      update_sql,
 | 
	
		
			
				|  |  | -                                    params=(2, video_text, state, task['pq_vid'])
 | 
	
		
			
				|  |  | +                                    params=(
 | 
	
		
			
				|  |  | +                                        const.VIDEO_UNDERSTAND_SUCCESS_STATUS,
 | 
	
		
			
				|  |  | +                                        video_text,
 | 
	
		
			
				|  |  | +                                        state,
 | 
	
		
			
				|  |  | +                                        task['pq_vid'],
 | 
	
		
			
				|  |  | +                                        const.VIDEO_LOCK
 | 
	
		
			
				|  |  | +                                    )
 | 
	
		
			
				|  |  |                                  )
 | 
	
		
			
				|  |  | +                                # delete local file and google file
 | 
	
		
			
				|  |  |                                  if os.path.exists(video_local_path):
 | 
	
		
			
				|  |  |                                      os.remove(video_local_path)
 | 
	
		
			
				|  |  | -                                tqdm.write("video transform to text success, delete local file, sleep 1 min...")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                                tqdm.write("video transform to text success, delete local file")
 | 
	
		
			
				|  |  |                                  task_list.remove(task)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |                                  self.google_ai_api.delete_video(file_name)
 | 
	
		
			
				|  |  | -                                print("delete video from google success: {}".format(file_name))
 | 
	
		
			
				|  |  | +                                tqdm.write("delete video from google success: {}".format(file_name))
 | 
	
		
			
				|  |  | +                            else:
 | 
	
		
			
				|  |  | +                                # roll back status
 | 
	
		
			
				|  |  | +                                self.update_video_status(
 | 
	
		
			
				|  |  | +                                    ori_status=const.VIDEO_LOCK,
 | 
	
		
			
				|  |  | +                                    new_status=const.VIDEO_UNDERSTAND_PROCESSING_STATUS,
 | 
	
		
			
				|  |  | +                                    pq_vid=task['pq_vid'],
 | 
	
		
			
				|  |  | +                                )
 | 
	
		
			
				|  |  |                          except Exception as e:
 | 
	
		
			
				|  |  | +                            # roll back status
 | 
	
		
			
				|  |  | +                            self.update_video_status(
 | 
	
		
			
				|  |  | +                                ori_status=const.VIDEO_LOCK,
 | 
	
		
			
				|  |  | +                                new_status=const.VIDEO_UNDERSTAND_PROCESSING_STATUS,
 | 
	
		
			
				|  |  | +                                pq_vid=task['pq_vid'],
 | 
	
		
			
				|  |  | +                            )
 | 
	
		
			
				|  |  |                              tqdm.write(str(e))
 | 
	
		
			
				|  |  |                              continue
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |                      case 'PROCESSING':
 | 
	
		
			
				|  |  |                          tqdm.write("video is still processing")
 | 
	
		
			
				|  |  | -                        continue
 | 
	
		
			
				|  |  | +                        # roll back status
 | 
	
		
			
				|  |  | +                        self.update_video_status(
 | 
	
		
			
				|  |  | +                            ori_status=const.VIDEO_LOCK,
 | 
	
		
			
				|  |  | +                            new_status=const.VIDEO_UNDERSTAND_PROCESSING_STATUS,
 | 
	
		
			
				|  |  | +                            pq_vid=task['pq_vid'],
 | 
	
		
			
				|  |  | +                        )
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |                      case 'FAILED':
 | 
	
		
			
				|  |  | -                        update_sql = f"""
 | 
	
		
			
				|  |  | -                            update video_content_understanding
 | 
	
		
			
				|  |  | -                            set status = %s, file_state = %s
 | 
	
		
			
				|  |  | -                            where pq_vid = %s;
 | 
	
		
			
				|  |  | -                        """
 | 
	
		
			
				|  |  | -                        self.db.save(
 | 
	
		
			
				|  |  | -                            update_sql,
 | 
	
		
			
				|  |  | -                            params=(99, state, task['pq_vid'])
 | 
	
		
			
				|  |  | +                        self.update_video_status(
 | 
	
		
			
				|  |  | +                            ori_status=const.VIDEO_LOCK,
 | 
	
		
			
				|  |  | +                            new_status=const.VIDEO_UNDERSTAND_FAIL_STATUS,
 | 
	
		
			
				|  |  | +                            pq_vid=task['pq_vid']
 | 
	
		
			
				|  |  |                          )
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |                          if os.path.exists(video_local_path):
 | 
	
		
			
				|  |  |                              os.remove(video_local_path)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |                          self.google_ai_api.delete_video(file_name)
 | 
	
		
			
				|  |  |                          tqdm.write("video process failed, delete local file")
 | 
	
		
			
				|  |  | -                        continue
 | 
	
		
			
				|  |  | -                time.sleep(10)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                time.sleep(const.SLEEP_SECONDS)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |              tqdm.write("执行完一轮任务,剩余数量:{}".format(len(task_list)))
 | 
	
		
			
				|  |  | -            time.sleep(60)
 | 
	
		
			
				|  |  | +            time.sleep(const.SLEEP_SECONDS)
 |