10 kuukautta sitten · 73ae6e7309
--- a/common/aliyun_oss.py
+++ b/common/aliyun_oss.py
@@ -99,6 +99,19 @@ class Oss():
 
				         else:
			
 
				             return video_path
			
 
				 
			
 
				+    @classmethod
			
 
				+    def download_sph_ls(cls, video_url, video_path_url, v_id):
			
 
				+        if "jpg" in video_url:
			
 
				+            video_path = video_path_url + str(v_id) + '.jpg'
			
 
				+        else:
			
 
				+            video_path = video_path_url + str(v_id) + '.mp4'
			
 
				+        auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET)
			
 
				+        bucket = oss2.Bucket(auth, OSS_BUCKET_ENDPOINT, OSS_BUCKET_NAME)
			
 
				+        # 获取指定路径下的对象列表
			
 
				+        bucket.get_object_to_file(video_url, video_path)
			
 
				+        time.sleep(5)
			
 
				+        return video_path
			
 
				 
			
 
				 
			
 
				-
			
 
				+if __name__ == '__main__':
			
 
				+    Oss.download_sph_ls('channel/video/sph/14374775553517295881.jpg','asa','1')
			
--- a/common/sql_help.py
+++ b/common/sql_help.py
@@ -127,7 +127,7 @@ class sqlCollect():
 
				     """获取视频号历史数据"""
			
 
				     @classmethod
			
 
				     def sph_data_info_list(cls, user):
			
 
				-        sql = """SELECT video_id, video_title, share_cnt, like_cnt, oss_url, oss_cover  FROM sph_data_info WHERE nick_name = %s  ORDER BY share_cnt DESC"""
			
 
				+        sql = """SELECT video_id, video_title, share_cnt, like_cnt, oss_url, oss_cover, duration FROM sph_data_info WHERE nick_name = %s  ORDER BY share_cnt DESC"""
			
 
				         data = MysqlHelper.get_values(sql, (user))
			
 
				         if data:
			
 
				             return data
			
--- a/data_channel/sph_crawling_data.py
+++ b/data_channel/sph_crawling_data.py
@@ -27,6 +27,7 @@ class SphHistory:
 
				             url = "http://61.48.133.26:30001/FinderGetUpMasterNextPage"
			
 
				             last_buffer = ""
			
 
				             try:
			
 
				+                count = 1
			
 
				                 while True:
			
 
				                     headers = {
			
 
				                         'Content-Type': 'application/json'
			
@@ -38,6 +39,8 @@ class SphHistory:
 
				 
			
 
				                     response = requests.request("POST", url, headers=headers, data=payload)
			
 
				                     time.sleep(random.randint(1, 5))
			
 
				+                    Common.logger("sph_crawling").info(f"{user}获取第{count}页视频")
			
 
				+                    count += 1
			
 
				                     if response.text == "" or response.text == None:
			
 
				                         break
			
 
				                     res_json = response.json()
			
@@ -53,56 +56,60 @@ class SphHistory:
 
				                     if not res_json["UpMasterHomePage"]:
			
 
				                         break
			
 
				                     last_buffer = res_json.get('last_buffer')
			
 
				-                    for obj in res_json["UpMasterHomePage"]:
			
 
				-                        Common.logger("sph_crawling").info(f"{user}扫描到一条数据")
			
 
				-                        objectId = obj['objectId']
			
 
				-                        object_id = sqlCollect.sph_data_info_v_id(objectId, "视频号")
			
 
				-                        if object_id:
			
 
				-                            continue
			
 
				-                        objectNonceId = obj['objectNonceId']
			
 
				-                        url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
			
 
				-                        payload = json.dumps({
			
 
				-                            "objectId": objectId,
			
 
				-                            "objectNonceId": objectNonceId
			
 
				-                        })
			
 
				-                        headers = {
			
 
				-                            'Content-Type': 'text/plain'
			
 
				-                        }
			
 
				-                        response = requests.request("POST", url, headers=headers, data=payload)
			
 
				-                        time.sleep(random.randint(0, 1))
			
 
				-                        video_obj = response.json()
			
 
				-                        video_url = video_obj.get('DownloadAddress')
			
 
				-                        duration = dataHelp.video_duration(video_url)
			
 
				-                        cover = video_obj.get('thumb_url')
			
 
				-                        if len(video_url) == 0:
			
 
				-                            continue
			
 
				-                        v_id = f"sph/{objectId}"
			
 
				-                        try:
			
 
				-                            Common.logger("sph_crawling").info(f"{user}视频ID:{objectId}，视频链接：{video_url}开始发送oss")
			
 
				-                            oss_video_key = Oss.channel_upload_oss(video_url, v_id)  # 视频发送OSS
			
 
				-                            oss_video_key = oss_video_key.get("oss_object_key")
			
 
				-                            Common.logger("sph_crawling").info(f"{user}视频发送oss成功，视频oss地址{oss_video_key}")
			
 
				-                            Common.logger("sph_crawling").info(f"{user}视频ID:{objectId}，封面链接：{cover}开始发送oss")
			
 
				-                            oss_cover_key = Oss.channel_upload_oss(cover, f"sph/{objectId}.jpg")  # 视频发送OSS
			
 
				-                            oss_cover_key = oss_cover_key.get("oss_object_key")
			
 
				-                            Common.logger("sph_crawling").info(f"{user}封面发送oss成功，封面oss地址{oss_video_key}")
			
 
				-                            create_time = obj['createtime']  # 发布时间
			
 
				-                        except:
			
 
				-                            continue
			
 
				-                        share_cnt = int(obj['forward_count'])  # 分享
			
 
				-                        like_cnt = int(obj['like_count'])  # 点赞
			
 
				-                        video_title = video_obj.get('title').split("\n")[0].split("#")[0]
			
 
				-                        user_name = obj['username']  # 用户名标示
			
 
				-                        nick_name = obj['nickname']  # 用户名
			
 
				-                        comment_count = obj['comment_count']  # 评论数
			
 
				-                        fav_count = obj['fav_count']  # 大拇指点赞数
			
 
				-                        sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time,duration)
			
 
				-                        Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
			
 
				+                    try:
			
 
				+                        for obj in res_json["UpMasterHomePage"]:
			
 
				+                            Common.logger("sph_crawling").info(f"{user}扫描到一条数据")
			
 
				+                            objectId = obj['objectId']
			
 
				+                            object_id = sqlCollect.sph_data_info_v_id(objectId, "视频号")
			
 
				+                            if object_id:
			
 
				+                                continue
			
 
				+                            objectNonceId = obj['objectNonceId']
			
 
				+                            url = "http://61.48.133.26:30001/GetFinderDownloadAddress"
			
 
				+                            payload = json.dumps({
			
 
				+                                "objectId": objectId,
			
 
				+                                "objectNonceId": objectNonceId
			
 
				+                            })
			
 
				+                            headers = {
			
 
				+                                'Content-Type': 'text/plain'
			
 
				+                            }
			
 
				+                            response = requests.request("POST", url, headers=headers, data=payload)
			
 
				+                            time.sleep(random.randint(0, 1))
			
 
				+                            video_obj = response.json()
			
 
				+                            video_url = video_obj.get('DownloadAddress')
			
 
				+                            duration = dataHelp.video_duration(video_url)
			
 
				+                            cover = video_obj.get('thumb_url')
			
 
				+                            if len(video_url) == 0:
			
 
				+                                continue
			
 
				+                            v_id = f"sph/{objectId}"
			
 
				+                            try:
			
 
				+                                Common.logger("sph_crawling").info(f"{user}视频ID:{objectId}，视频链接：{video_url}开始发送oss")
			
 
				+                                oss_video_key = Oss.channel_upload_oss(video_url, v_id)  # 视频发送OSS
			
 
				+                                oss_video_key = oss_video_key.get("oss_object_key")
			
 
				+                                Common.logger("sph_crawling").info(f"{user}视频发送oss成功，视频oss地址{oss_video_key}")
			
 
				+                                Common.logger("sph_crawling").info(f"{user}视频ID:{objectId}，封面链接：{cover}开始发送oss")
			
 
				+                                oss_cover_key = Oss.channel_upload_oss(cover, f"sph/{objectId}.jpg")  # 视频发送OSS
			
 
				+                                oss_cover_key = oss_cover_key.get("oss_object_key")
			
 
				+                                Common.logger("sph_crawling").info(f"{user}封面发送oss成功，封面oss地址{oss_video_key}")
			
 
				+                                create_time = obj['createtime']  # 发布时间
			
 
				+                            except:
			
 
				+                                continue
			
 
				+                            share_cnt = int(obj['forward_count'])  # 分享
			
 
				+                            like_cnt = int(obj['like_count'])  # 点赞
			
 
				+                            video_title = video_obj.get('title').split("\n")[0].split("#")[0]
			
 
				+                            user_name = obj['username']  # 用户名标示
			
 
				+                            nick_name = obj['nickname']  # 用户名
			
 
				+                            comment_count = obj['comment_count']  # 评论数
			
 
				+                            fav_count = obj['fav_count']  # 大拇指点赞数
			
 
				+                            sqlCollect.sph_data_info('视频号', objectId, video_url, cover, video_title, str(share_cnt), str(like_cnt), oss_video_key, oss_cover_key, nick_name, user_name, comment_count, fav_count, create_time,duration)
			
 
				+                            Common.logger("sph_crawling").info(f"{nick_name}插入数据成功")
			
 
				+                    except Exception as e:
			
 
				+                        Common.logger("sph_crawling").info(f"{user}异常，异常信息{e}")
			
 
				+                        continue
			
 
				                 sqlCollect.update_sph_channel_user_status(user)
			
 
				                 Common.logger("sph_crawling").info(f"{user}用户抓取完成")
			
 
				                 count = sqlCollect.sph_data_info_count(user, "视频号")
			
 
				                 text = (
			
 
				-                    f"**{user}抓取完成共抓了{count}条数据**\n"
			
 
				+                    f"**{user}抓取完成:共抓了{count[0]}条数据**\n"
			
 
				                 )
			
 
				                 Feishu.finish_bot(text,
			
 
				                                   "https://open.feishu.cn/open-apis/bot/v2/hook/029fa989-9847-4574-8e1b-5c396e665f16",
			
@@ -134,6 +141,6 @@ class SphHistory:
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    SphHistory.sph_data_info()
			
 
				-    # count = sqlCollect.sph_data_info_count("郑蓝旗", "视频号")
			
 
				-    # print(count)
			
 
				+    # SphHistory.sph_data_info()
			
 
				+    count = sqlCollect.sph_data_info_count("郑蓝旗", "视频号")
			
 
				+    print(count)
			
--- a/data_channel/sph_ls.py
+++ b/data_channel/sph_ls.py
@@ -1,3 +1,4 @@
 
				+from common import Common
			
 
				 from common.sql_help import sqlCollect
			
 
				 
			
 
				 
			
@@ -10,14 +11,31 @@ class SPHLS:
 
				         if data_list:
			
 
				             for data in data_list:
			
 
				                 video_id = data[0]
			
 
				-                old_title = data[1]
			
 
				-                share_cnt = int(data[2])
			
 
				-                like_cnt = int(data[3])
			
 
				-                oss_url = data[4]
			
 
				-                oss_cover = data[5]
			
 
				-                if share_cnt < 300 or share_cnt < like_cnt or
			
 
				-                all_data = {"video_id": video_id, "cover": oss_cover, "video_url": oss_url, "rule": video_percent,
			
 
				-                            "old_title": old_title}
			
 
				+                status = sqlCollect.is_used(video_id, mark, "视频号")
			
 
				+                if status:
			
 
				+                    old_title = data[1]
			
 
				+                    share_cnt = int(data[2])
			
 
				+                    like_cnt = int(data[3])
			
 
				+                    oss_url = data[4]
			
 
				+                    oss_cover = data[5]
			
 
				+                    duration = int(float(data[6]))
			
 
				+                    Common.logger("sph-ls").info(
			
 
				+                        f"扫描：{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt},时长:{duration}")
			
 
				+                    if share_cnt < 300 or share_cnt < like_cnt or duration < 30 or duration > 720:
			
 
				+                        Common.logger("sph-ls").info(
			
 
				+                            f"任务：{task_mark},用户主页id:{url},视频id{video_id} ,分享:{share_cnt},点赞:{like_cnt} ,时长:{duration} ")
			
 
				+                        continue
			
 
				+
			
 
				+                    all_data = {"video_id": video_id, "cover": oss_cover, "video_url": oss_url, "rule": '',
			
 
				+                                "old_title": old_title}
			
 
				+                    list.append(all_data)
			
 
				+                    if len(list) == int(number):
			
 
				+                        Common.logger("sph-ls").info(f"获取视频号视频总数:{len(list)}\n")
			
 
				+                        return list
			
 
				+            return list
			
 
				+        else:
			
 
				+            Common.logger("sph-ls").info(f"{url}无数据\n")
			
 
				+            return None
			
 
				 
			
 
				 
			
 
				 
			
--- a/video_rewriting/video_processor.py
+++ b/video_rewriting/video_processor.py
@@ -25,6 +25,7 @@ from data_channel.shipinhao import SPH
 
				 
			
 
				 # 读取配置文件
			
 
				 from data_channel.shipinhaodandian import SPHDD
			
 
				+from data_channel.sph_ls import SPHLS
			
 
				 
			
 
				 config = configparser.ConfigParser()
			
 
				 config.read('./config.ini')
			
@@ -280,6 +281,8 @@ class VideoProcessor:
 
				             return DYLS.get_dyls_list(task_mark, url, number, mark)
			
 
				         elif channel_id == "快手历史":
			
 
				             return KSLS.get_ksls_list(task_mark, url, number, mark)
			
 
				+        elif channel_id == "视频号历史":
			
 
				+            return SPHLS.get_sphls_data(task_mark, url, number, mark)
			
 
				 
			
 
				     @classmethod
			
 
				     def generate_title(cls, video, title):
			
@@ -318,12 +321,13 @@ class VideoProcessor:
 
				         if channel_id in ["票圈", "快手创作者版"]:
			
 
				             new_video_path = PQ.download_video(video_url, video_path_url, v_id)
			
 
				             Common.logger(mark).info(f"{channel_id}视频下载成功: {new_video_path}")
			
 
				-
			
 
				+        elif channel_id == "视频号历史":
			
 
				+            new_video_path = PQ.download_video(video_url, video_path_url, v_id)
			
 
				+            Common.logger(mark).info(f"{channel_id}视频下载成功: {new_video_path}")
			
 
				         else:
			
 
				             Common.logger(mark).info(f"视频准备下载")
			
 
				             new_video_path = Oss.download_video_oss(video_url, video_path_url, v_id)
			
 
				             Common.logger(mark).info(f"视频下载成功: {new_video_path}")
			
 
				-
			
 
				         if os.path.isfile(new_video_path):
			
 
				             if crop_total and crop_total != 'None':  # 判断是否需要裁剪
			
 
				                 new_video_path = FFmpeg.video_crop(new_video_path, video_path_url, pw_random_id)
			
@@ -425,7 +429,10 @@ class VideoProcessor:
 
				                 if channel_id == "快手历史":
			
 
				                     jpg = None
			
 
				                 else:
			
 
				-                    jpg_path = PQ.download_video_jpg(cover, video_path_url, v_id)  # 下载视频封面
			
 
				+                    if channel_id == "视频号历史":
			
 
				+                        jpg_path = Oss.download_sph_ls(cover, video_path_url, v_id)
			
 
				+                    else:
			
 
				+                        jpg_path = PQ.download_video_jpg(cover, video_path_url, v_id)  # 下载视频封面
			
 
				                     if os.path.isfile(jpg_path):
			
 
				                         oss_jpg_key = Oss.stitching_fm_upload_oss(jpg_path, oss_id)  # 封面发送OSS
			
 
				                         status = oss_jpg_key.get("status")