zhangyong il y a 8 mois
Parent
commit
73ae6e7309

+ 14 - 1
common/aliyun_oss.py

@@ -99,6 +99,19 @@ class Oss():
         else:
             return video_path
 
+    @classmethod
+    def download_sph_ls(cls, video_url, video_path_url, v_id):
+        if "jpg" in video_url:
+            video_path = video_path_url + str(v_id) + '.jpg'
+        else:
+            video_path = video_path_url + str(v_id) + '.mp4'
+        auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET)
+        bucket = oss2.Bucket(auth, OSS_BUCKET_ENDPOINT, OSS_BUCKET_NAME)
+        # 获取指定路径下的对象列表
+        bucket.get_object_to_file(video_url, video_path)
+        time.sleep(5)
+        return video_path
 
 
-
+if __name__ == '__main__':
+    Oss.download_sph_ls('channel/video/sph/14374775553517295881.jpg','asa','1')

+ 1 - 1
common/sql_help.py

@@ -127,7 +127,7 @@ class sqlCollect():
     """获取视频号历史数据"""
     @classmethod
     def sph_data_info_list(cls, user):
-        sql = """SELECT video_id, video_title, share_cnt, like_cnt, oss_url, oss_cover  FROM sph_data_info WHERE nick_name = %s  ORDER BY share_cnt DESC"""
+        sql = """SELECT video_id, video_title, share_cnt, like_cnt, oss_url, oss_cover, duration FROM sph_data_info WHERE nick_name = %s  ORDER BY share_cnt DESC"""
         data = MysqlHelper.get_values(sql, (user))
         if data:
             return data

+ 56 - 49
data_channel/sph_crawling_data.py

@@ -27,6 +27,7 @@ class SphHistory:
             url = "http://61.48.133.26:30001/FinderGetUpMasterNextPage"
             last_buffer = ""
             try:
+                count = 1
                 while True:
                     headers = {
                         'Content-Type': 'application/json'
@@ -38,6 +39,8 @@ class SphHistory:
 
                     response = requests.request("POST", url, headers=headers, data=payload)
                     time.sleep(random.randint(1, 5))
+                    Common.logger("sph_crawling").info(f"{user}获取第{count}页视频")
+                    count += 1
                     if response.text == "" or response.text == None:
                         break
                     res_json = response.json()
@@ -53,56 +56,60 @@ class SphHistory:
                     if not res_json["UpMasterHomePage"]:
                         break
                     last_buffer = res_json.get('last_buffer')
-                    for obj in res_json["UpMasterHomePage"]:
-                        Common.logger("sph_crawling").info(f"{user}扫描到一条数据")
-                        objectId = obj['objectId']
-                        object_id = sqlCollect.sph_data_info_v_id(objectId, "视频号")
-                        if object_id:
-                            continue
-                        objectNonceId = obj['objectNonceId']
-                        url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
-                        payload = json.dumps({
-                            "objectId": objectId,
-                            "objectNonceId": objectNonceId
-                        })
-                        headers = {
-                            'Content-Type': 'text/plain'
-                        }
-                        response = requests.request("POST", url, headers=headers, data=payload)
-                        time.sleep(random.randint(0, 1))
-                        video_obj = response.json()
-                        video_url = video_obj.get('DownloadAddress')
-                        duration = dataHelp.video_duration(video_url)
-                        cover = video_obj.get('thumb_url')
-                        if len(video_url) == 0:
-                            continue
-                        v_id = f"sph/{objectId}"
-                        try:
-                            Common.logger("sph_crawling").info(f"{user}视频ID:{objectId},视频链接:{video_url}开始发送oss")
-                            oss_video_key = Oss.channel_upload_oss(video_url, v_id)  # 视频发送OSS
-                            oss_video_key = oss_video_key.get("oss_object_key")
-                            Common.logger("sph_crawling").info(f"{user}视频发送oss成功,视频oss地址{oss_video_key}")
-                            Common.logger("sph_crawling").info(f"{user}视频ID:{objectId},封面链接:{cover}开始发送oss")
-                            oss_cover_key = Oss.channel_upload_oss(cover, f"sph/{objectId}.jpg")  # 视频发送OSS
-                            oss_cover_key = oss_cover_key.get("oss_object_key")
-                            Common.logger("sph_crawling").info(f"{user}封面发送oss成功,封面oss地址{oss_video_key}")
-                            create_time = obj['createtime']  # 发布时间
-                        except:
-                            continue
-                        share_cnt = int(obj['forward_count'])  # 分享
-                        like_cnt = int(obj['like_count'])  # 点赞
-                        video_title = video_obj.get('title').split("\n")[0].split("#")[0]
-                        user_name = obj['username']  # 用户名标示
-                        nick_name = obj['nickname']  # 用户名
-                        comment_count = obj['comment_count']  # 评论数
-                        fav_count = obj['fav_count']  # 大拇指点赞数
-                        sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time,duration)
-                        Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
+                    try:
+                        for obj in res_json["UpMasterHomePage"]:
+                            Common.logger("sph_crawling").info(f"{user}扫描到一条数据")
+                            objectId = obj['objectId']
+                            object_id = sqlCollect.sph_data_info_v_id(objectId, "视频号")
+                            if object_id:
+                                continue
+                            objectNonceId = obj['objectNonceId']
+                            url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
+                            payload = json.dumps({
+                                "objectId": objectId,
+                                "objectNonceId": objectNonceId
+                            })
+                            headers = {
+                                'Content-Type': 'text/plain'
+                            }
+                            response = requests.request("POST", url, headers=headers, data=payload)
+                            time.sleep(random.randint(0, 1))
+                            video_obj = response.json()
+                            video_url = video_obj.get('DownloadAddress')
+                            duration = dataHelp.video_duration(video_url)
+                            cover = video_obj.get('thumb_url')
+                            if len(video_url) == 0:
+                                continue
+                            v_id = f"sph/{objectId}"
+                            try:
+                                Common.logger("sph_crawling").info(f"{user}视频ID:{objectId},视频链接:{video_url}开始发送oss")
+                                oss_video_key = Oss.channel_upload_oss(video_url, v_id)  # 视频发送OSS
+                                oss_video_key = oss_video_key.get("oss_object_key")
+                                Common.logger("sph_crawling").info(f"{user}视频发送oss成功,视频oss地址{oss_video_key}")
+                                Common.logger("sph_crawling").info(f"{user}视频ID:{objectId},封面链接:{cover}开始发送oss")
+                                oss_cover_key = Oss.channel_upload_oss(cover, f"sph/{objectId}.jpg")  # 视频发送OSS
+                                oss_cover_key = oss_cover_key.get("oss_object_key")
+                                Common.logger("sph_crawling").info(f"{user}封面发送oss成功,封面oss地址{oss_video_key}")
+                                create_time = obj['createtime']  # 发布时间
+                            except:
+                                continue
+                            share_cnt = int(obj['forward_count'])  # 分享
+                            like_cnt = int(obj['like_count'])  # 点赞
+                            video_title = video_obj.get('title').split("\n")[0].split("#")[0]
+                            user_name = obj['username']  # 用户名标示
+                            nick_name = obj['nickname']  # 用户名
+                            comment_count = obj['comment_count']  # 评论数
+                            fav_count = obj['fav_count']  # 大拇指点赞数
+                            sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time,duration)
+                            Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
+                    except Exception as e:
+                        Common.logger("sph_crawling").info(f"{user}异常,异常信息{e}")
+                        continue
                 sqlCollect.update_sph_channel_user_status(user)
                 Common.logger("sph_crawling").info(f"{user}用户抓取完成")
                 count = sqlCollect.sph_data_info_count(user, "视频号")
                 text = (
-                    f"**{user}抓取完成共抓了{count}条数据**\n"
+                    f"**{user}抓取完成:共抓了{count[0]}条数据**\n"
                 )
                 Feishu.finish_bot(text,
                                   "https://open.feishu.cn/open-apis/bot/v2/hook/029fa989-9847-4574-8e1b-5c396e665f16",
@@ -134,6 +141,6 @@ class SphHistory:
 
 
 if __name__ == '__main__':
-    SphHistory.sph_data_info()
-    # count = sqlCollect.sph_data_info_count("郑蓝旗", "视频号")
-    # print(count)
+    # SphHistory.sph_data_info()
+    count = sqlCollect.sph_data_info_count("郑蓝旗", "视频号")
+    print(count)

+ 26 - 8
data_channel/sph_ls.py

@@ -1,3 +1,4 @@
+from common import Common
 from common.sql_help import sqlCollect
 
 
@@ -10,14 +11,31 @@ class SPHLS:
         if data_list:
             for data in data_list:
                 video_id = data[0]
-                old_title = data[1]
-                share_cnt = int(data[2])
-                like_cnt = int(data[3])
-                oss_url = data[4]
-                oss_cover = data[5]
-                if share_cnt < 300 or share_cnt < like_cnt or
-                all_data = {"video_id": video_id, "cover": oss_cover, "video_url": oss_url, "rule": video_percent,
-                            "old_title": old_title}
+                status = sqlCollect.is_used(video_id, mark, "视频号")
+                if status:
+                    old_title = data[1]
+                    share_cnt = int(data[2])
+                    like_cnt = int(data[3])
+                    oss_url = data[4]
+                    oss_cover = data[5]
+                    duration = int(float(data[6]))
+                    Common.logger("sph-ls").info(
+                        f"扫描:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt},时长:{duration}")
+                    if share_cnt < 300 or share_cnt < like_cnt or duration < 30 or duration > 720:
+                        Common.logger("sph-ls").info(
+                            f"任务:{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
+                        continue
+
+                    all_data = {"video_id": video_id, "cover": oss_cover, "video_url": oss_url, "rule": '',
+                                "old_title": old_title}
+                    list.append(all_data)
+                    if len(list) == int(number):
+                        Common.logger("sph-ls").info(f"获取视频号视频总数:{len(list)}\n")
+                        return list
+            return list
+        else:
+            Common.logger("sph-ls").info(f"{url}无数据\n")
+            return None
 
 
 

+ 10 - 3
video_rewriting/video_processor.py

@@ -25,6 +25,7 @@ from data_channel.shipinhao import SPH
 
 # 读取配置文件
 from data_channel.shipinhaodandian import SPHDD
+from data_channel.sph_ls import SPHLS
 
 config = configparser.ConfigParser()
 config.read('./config.ini')
@@ -280,6 +281,8 @@ class VideoProcessor:
             return DYLS.get_dyls_list(task_mark, url, number, mark)
         elif channel_id == "快手历史":
             return KSLS.get_ksls_list(task_mark, url, number, mark)
+        elif channel_id == "视频号历史":
+            return SPHLS.get_sphls_data(task_mark, url, number, mark)
 
     @classmethod
     def generate_title(cls, video, title):
@@ -318,12 +321,13 @@ class VideoProcessor:
         if channel_id in ["票圈", "快手创作者版"]:
             new_video_path = PQ.download_video(video_url, video_path_url, v_id)
             Common.logger(mark).info(f"{channel_id}视频下载成功: {new_video_path}")
-
+        elif channel_id == "视频号历史":
+            new_video_path = PQ.download_video(video_url, video_path_url, v_id)
+            Common.logger(mark).info(f"{channel_id}视频下载成功: {new_video_path}")
         else:
             Common.logger(mark).info(f"视频准备下载")
             new_video_path = Oss.download_video_oss(video_url, video_path_url, v_id)
             Common.logger(mark).info(f"视频下载成功: {new_video_path}")
-
         if os.path.isfile(new_video_path):
             if crop_total and crop_total != 'None':  # 判断是否需要裁剪
                 new_video_path = FFmpeg.video_crop(new_video_path, video_path_url, pw_random_id)
@@ -425,7 +429,10 @@ class VideoProcessor:
                 if channel_id == "快手历史":
                     jpg = None
                 else:
-                    jpg_path = PQ.download_video_jpg(cover, video_path_url, v_id)  # 下载视频封面
+                    if channel_id == "视频号历史":
+                        jpg_path = Oss.download_sph_ls(cover, video_path_url, v_id)
+                    else:
+                        jpg_path = PQ.download_video_jpg(cover, video_path_url, v_id)  # 下载视频封面
                     if os.path.isfile(jpg_path):
                         oss_jpg_key = Oss.stitching_fm_upload_oss(jpg_path, oss_id)  # 封面发送OSS
                         status = oss_jpg_key.get("status")