sunxy 1 år sedan
förälder
incheckning
7eec0042fa
2 ändrade filer med 21 tillägg och 24 borttagningar
  1. 9 1
      download_videos_task.py
  2. 12 23
      utils.py

+ 9 - 1
download_videos_task.py

@@ -25,6 +25,10 @@ def download_video_from_oss(video_id, video_path, download_folder):
         os.makedirs(video_local_dir)
         video_filename = video_path.split('/')[-1]
         video_local_path = os.path.join(video_local_dir, video_filename)
+        # 判断文件不存在
+        if os.path.exists(video_local_path):
+            print(f"{video_id} already exists!")
+            return
         # 阿里云账号AccessKey拥有所有API的访问权限,风险很高。强烈建议您创建并使用RAM用户进行API访问或日常运维,请登录RAM控制台创建RAM用户。
         # auth = oss2.Auth(access_key_id=config_.ODPS_CONFIG['ACCESSID'], access_key_secret=config_.ODPS_CONFIG['ACCESSKEY'])
         auth = oss2.Auth(access_key_id=config_.OSS_CONFIG['accessKeyId'],
@@ -79,6 +83,7 @@ def timer_check():
     try:
         project = config_.DAILY_VIDEO['project']
         table = config_.DAILY_VIDEO['table']
+        # 昨天
         now_date = datetime.datetime.today()
         print(f"now_date: {datetime.datetime.strftime(now_date, '%Y%m%d')}")
         dt = datetime.datetime.strftime(
@@ -99,4 +104,7 @@ def timer_check():
 
 
 if __name__ == '__main__':
-    timer_check()
+    # timer_check()
+    download_videos('loghubods', 'vid_daily_top_not_taged', '20240426')
+    download_videos('loghubods', 'vid_daily_top_not_taged', '20240427')
+    download_videos('loghubods', 'vid_daily_top_not_taged', '20240428')

+ 12 - 23
utils.py

@@ -29,10 +29,6 @@ def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=
         secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
         project=project,
         endpoint=config_.ODPS_CONFIG['ENDPOINT'],
-        connect_timeout=connect_timeout,
-        read_timeout=read_timeout,
-        pool_maxsize=pool_maxsize,
-        pool_connections=pool_connections
     )
     records = odps.read_table(name=table, partition='dt=%s' % date)
     return records
@@ -68,11 +64,7 @@ def check_table_partition_exits(date, project, table, connect_timeout=3000, read
         access_id=config_.ODPS_CONFIG['ACCESSID'],
         secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
         project=project,
-        endpoint=config_.ODPS_CONFIG['ENDPOINT'],
-        connect_timeout=connect_timeout,
-        read_timeout=read_timeout,
-        pool_maxsize=pool_maxsize,
-        pool_connections=pool_connections
+        endpoint=config_.ODPS_CONFIG['ENDPOINT']
     )
     t = odps.get_table(name=table)
     return t.exist_partition(partition_spec=f'dt={date}')
@@ -84,15 +76,12 @@ def data_check(project, table, dt):
         access_id=config_.ODPS_CONFIG['ACCESSID'],
         secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
         project=project,
-        endpoint=config_.ODPS_CONFIG['ENDPOINT'],
-        connect_timeout=3000,
-        read_timeout=500000,
-        pool_maxsize=1000,
-        pool_connections=1000
+        endpoint=config_.ODPS_CONFIG['ENDPOINT']
     )
 
     try:
-        check_res = check_table_partition_exits(date=dt, project=project, table=table)
+        check_res = check_table_partition_exits(
+            date=dt, project=project, table=table)
         if check_res:
             sql = f'select * from {project}.{table} where dt = {dt}'
             with odps.execute_sql(sql=sql).open_reader() as reader:
@@ -113,7 +102,8 @@ def request_post(request_url, headers, request_data):
     :return: res_data json格式
     """
     try:
-        response = requests.post(url=request_url, json=request_data, headers=headers)
+        response = requests.post(
+            url=request_url, json=request_data, headers=headers)
         # print(response)
         if response.status_code == 200:
             res_data = json.loads(response.text)
@@ -121,7 +111,8 @@ def request_post(request_url, headers, request_data):
         else:
             return None
     except Exception as e:
-        log_.error('url: {}, exception: {}, traceback: {}'.format(request_url, e, traceback.format_exc()))
+        log_.error('url: {}, exception: {}, traceback: {}'.format(
+            request_url, e, traceback.format_exc()))
         return None
 
 
@@ -134,14 +125,16 @@ def request_get(request_url, headers, params=None):
     :return: res_data json格式
     """
     try:
-        response = requests.get(url=request_url, headers=headers, params=params)
+        response = requests.get(
+            url=request_url, headers=headers, params=params)
         if response.status_code == 200:
             res_data = json.loads(response.text)
             return res_data
         else:
             return None
     except Exception as e:
-        log_.error('url: {}, exception: {}, traceback: {}'.format(request_url, e, traceback.format_exc()))
+        log_.error('url: {}, exception: {}, traceback: {}'.format(
+            request_url, e, traceback.format_exc()))
         return None
 
 
@@ -173,9 +166,6 @@ def asr_validity_discrimination(text):
     return True
 
 
-
-
-
 if __name__ == '__main__':
     text = """现场和电视机前的观众朋友,大家晚上好。
 这里是非常说明的访谈现场,
@@ -913,4 +903,3 @@ Haha哈哈那个。
 他还是。"""
     res = asr_validity_discrimination(text=text)
     print(res)
-