zhangyong 8 ماه پیش
والد
کامیت
c43cdc6353
1فایلهای تغییر یافته به همراه42 افزوده شده و 7 حذف شده
  1. 42 7
      data_channel/sph_crawling_data.py

+ 42 - 7
data_channel/sph_crawling_data.py

@@ -8,13 +8,40 @@ import requests
 
 
 from common import Material, Oss, Common
 from common import Material, Oss, Common
 from common.sql_help import sqlCollect
 from common.sql_help import sqlCollect
+from data_channel.piaoquan import PQ
 from data_channel.shipinhao import SPH
 from data_channel.shipinhao import SPH
 class SphHistory:
 class SphHistory:
 
 
+    @classmethod
+    def remove_files(cls, video_path_url):
+        """
+        删除指定目录下的所有文件和子目录
+        """
+        if os.path.exists(video_path_url) and os.path.isdir(video_path_url):
+            for root, dirs, files in os.walk(video_path_url):
+                for file in files:
+                    file_path = os.path.join(root, file)
+                    os.remove(file_path)
+                for dir in dirs:
+                    dir_path = os.path.join(root, dir)
+                    os.rmdir(dir_path)
+
+    @classmethod
+    def create_folders(cls):
+        """
+        根据标示和任务标示创建目录
+        """
+        # video_path_url = "/Users/tzld/Desktop/video_rewriting/path/sph_crawling/"
+        video_path_url = '/root/video_rewriting/path/sph_crawling/'
+        if not os.path.exists(video_path_url):
+            os.makedirs(video_path_url)
+        return video_path_url
+
     """获取视频号所有内容"""
     """获取视频号所有内容"""
     @classmethod
     @classmethod
     def sph_data_info(cls):
     def sph_data_info(cls):
         user_list = cls.get_sph_user()
         user_list = cls.get_sph_user()
+        video_path_url = cls.create_folders()
         if user_list == None:
         if user_list == None:
             return
             return
         for user in user_list:
         for user in user_list:
@@ -22,6 +49,7 @@ class SphHistory:
             account_id = SPH.get_account_id(user)
             account_id = SPH.get_account_id(user)
             if account_id == False:
             if account_id == False:
                 print(f"{account_id}:没有获取到视频account_id,无法抓取数据")
                 print(f"{account_id}:没有获取到视频account_id,无法抓取数据")
+                continue
             url = "http://61.48.133.26:30001/FinderGetUpMasterNextPage"
             url = "http://61.48.133.26:30001/FinderGetUpMasterNextPage"
             last_buffer = ""
             last_buffer = ""
             try:
             try:
@@ -50,6 +78,7 @@ class SphHistory:
                         break
                         break
                     last_buffer = res_json.get('last_buffer')
                     last_buffer = res_json.get('last_buffer')
                     for obj in res_json["UpMasterHomePage"]:
                     for obj in res_json["UpMasterHomePage"]:
+                        Common.logger("sph_crawling").info(f"{user}扫描到一条数据")
                         objectId = obj['objectId']
                         objectId = obj['objectId']
                         objectNonceId = obj['objectNonceId']
                         objectNonceId = obj['objectNonceId']
                         url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
                         url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
@@ -66,16 +95,19 @@ class SphHistory:
                         video_url = video_obj.get('DownloadAddress')
                         video_url = video_obj.get('DownloadAddress')
                         if len(video_url) == 0:
                         if len(video_url) == 0:
                             continue
                             continue
-                        v_id = f"sph_{objectId}.mp4"
-                        c_id = f"sph_{objectId}.jpg"
-                        oss_video_path = Oss.channel_upload_oss(video_url, v_id)
-                        oss_video_key = oss_video_path.get("oss_object_key")
+                        v_id = f"sph/{objectId}"
+                        oss_video_key = Oss.channel_upload_oss(video_url, v_id)  # 视频发送OSS
+                        oss_video_key = oss_video_key.get("oss_object_key")
                         share_cnt = int(obj['forward_count'])  # 分享
                         share_cnt = int(obj['forward_count'])  # 分享
                         like_cnt = int(obj['like_count'])  # 点赞
                         like_cnt = int(obj['like_count'])  # 点赞
                         video_title = video_obj.get('title').split("\n")[0].split("#")[0]
                         video_title = video_obj.get('title').split("\n")[0].split("#")[0]
                         cover = video_obj.get('thumb_url')
                         cover = video_obj.get('thumb_url')
-                        oss_cover_path = Oss.channel_upload_oss(cover, c_id)
-                        oss_cover_key = oss_cover_path.get("oss_object_key")
+                        jpg_path = PQ.download_video_jpg(cover, video_path_url, v_id)  # 下载视频封面
+                        if os.path.isfile(jpg_path):
+                            oss_jpg_key = Oss.stitching_fm_upload_oss(jpg_path, v_id)  # 封面发送OSS
+                            oss_cover_key = oss_jpg_key.get("oss_object_key")
+                        else:
+                            oss_cover_key = ''
                         Common.logger("sph_crawling").info(f"{user}oss地址:视频{oss_video_key},封面{oss_cover_key}")
                         Common.logger("sph_crawling").info(f"{user}oss地址:视频{oss_video_key},封面{oss_cover_key}")
                         create_time = obj['createtime']  # 发布时间
                         create_time = obj['createtime']  # 发布时间
                         user_name = obj['username']  # 用户名标示
                         user_name = obj['username']  # 用户名标示
@@ -84,8 +116,11 @@ class SphHistory:
                         fav_count = obj['fav_count']  # 大拇指点赞数
                         fav_count = obj['fav_count']  # 大拇指点赞数
                         sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time)
                         sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time)
                         Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
                         Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
+                        cls.remove_files(video_path_url)
                         return "完成"
                         return "完成"
-            except:
+            except Exception as e:
+                Common.logger("sph_crawling").info(f"{user}异常,异常信息{e}")
+                cls.remove_files(video_path_url)
                 continue
                 continue
 
 
     @classmethod
     @classmethod