Преглед изворни кода

支持抖音/快手平台单账号最高分享内容改造

zhangyong пре 10 месеци
родитељ
комит
3d6a193317
5 измењених фајлова са 321 додато и 110 уклоњено
  1. 22 1
      common/feishu_form.py
  2. 89 0
      data_channel/dy_ls.py
  3. 86 0
      data_channel/ks_ls.py
  4. 112 108
      data_channel/qpl_ks.py
  5. 12 1
      video_rewriting/video_processor.py

+ 22 - 1
common/feishu_form.py

@@ -48,7 +48,10 @@ class Material():
             crop_tool = row[7]
             gg_duration = row[8]
             title = row[9]
-
+            try:
+                ls_number = int(row[10])
+            except:
+                ls_number = None
             def count_items(item, separator):
                 if item and item not in {'None', ''}:
                     return len(item.split(separator))
@@ -76,6 +79,24 @@ class Material():
                     "gg_duration_total": gg_duration,
                 }
                 processed_list.append(number_dict)
+                if ls_number and ls_number not in {'None', ''}:
+                    if channel_id == "抖音":
+                        new_channel_id = "抖音历史"
+                    if channel_id == "快手":
+                        new_channel_id = "快手历史"
+                    number_dict = {
+                        "task_mark": task_mark,
+                        "channel_id": new_channel_id,
+                        "channel_url": channel_url,
+                        "piaoquan_id": piaoquan_id,
+                        "number": ls_number,
+                        "title": title,
+                        "video_share": video_share,
+                        "video_ending": video_ending,
+                        "crop_total": crop_tool,
+                        "gg_duration_total": gg_duration,
+                    }
+                    processed_list.append(number_dict)
             else:
                 return processed_list
 

+ 89 - 0
data_channel/dy_ls.py

@@ -0,0 +1,89 @@
+import random
+import time
+
+import requests
+import json
+
+from common import Common
+from common.sql_help import sqlCollect
+
+
+class DYLS:
+
+    @classmethod
+    def get_dyls_list(cls, task_mark, url_id, number, mark):
+        next_cursor = ""
+        for i in range(50):
+            url = "http://8.217.190.241:8888/crawler/dou_yin/blogger"
+            payload = json.dumps({
+                "account_id": url_id,
+                "source": "抖查查",
+                "cursor": next_cursor
+            })
+            headers = {
+                'Content-Type': 'application/json'
+            }
+            time.sleep(random.randint(1, 5))
+            response = requests.request("POST", url, headers=headers, data=payload)
+            response = response.json()
+            list = []
+            data_all_list = response["data"]
+            has_more = data_all_list["has_more"]
+            next_cursor = str(data_all_list["next_cursor"])
+            try:
+                data_list = data_all_list["data"]
+                for data in data_list:
+                    # comment_count = data["comment_count"]
+                    # download_count = data["download_count"]
+                    share_count = data["share_count"]
+                    good_count = data["good_count"]
+                    # collect_count = data["collect_count"]
+                    duration = data["duration"]
+                    video_id = data["video_id"]
+                    old_title = data["video_desc"]
+                    status = sqlCollect.is_used(task_mark, video_id, mark, "抖音")
+                    if status:
+                        video_percent = '%.2f' % (int(share_count) / int(good_count))
+                        special = float(0.25)
+                        duration = duration / 1000
+                        if int(share_count) < 500 or float(video_percent) < special or int(duration) < 30 or int(duration) > 720:
+                            Common.logger("dy-ls").info(
+                                f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{good_count} ,时长:{int(duration)} ")
+                            continue
+                        video_url, image_url = cls.get_video(video_id)
+                        if video_url:
+                            all_data = {"video_id": video_id, "cover": image_url, "video_url": video_url, "rule": video_percent,
+                                        "old_title": old_title}
+                            list.append(all_data)
+                            if len(list) == int(number):
+                                Common.logger("dy-ls").info(f"获取抖音历史视频总数:{len(list)}\n")
+                                return list
+                if has_more == False:
+                    return list
+            except Exception as exc:
+                Common.logger("dy-ls").info(f"抖音历史数据获取失败:{exc}\n")
+                return list
+
+    @classmethod
+    def get_video(cls, video_id):
+        url = "http://8.217.190.241:8888/crawler/dou_yin/detail"
+
+        payload = json.dumps({
+            "content_id": str(video_id)
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+
+        response = requests.request("POST", url, headers=headers, data=payload)
+        response = response.json()
+        data = response["data"]["data"]
+        video_url = data["video_url_list"][0]["video_url"]
+        image_url = data["image_url_list"][0]["image_url"]
+        return video_url, image_url
+
+
+
+if __name__ == '__main__':
+    # DYLS.get_video("7314923922602954022")
+    DYLS.get_dyls_list("1","MS4wLjABAAAADUObyc_aoKXnXnV01JEcZMdvU0_ZFvFnVQAU-weztOgHubCQont1aDrDASxWu8B6",1,"1")

+ 86 - 0
data_channel/ks_ls.py

@@ -0,0 +1,86 @@
+import random
+import time
+import requests
+import json
+from common import Common
+from common.sql_help import sqlCollect
+
+class KSLS:
+
+    @classmethod
+    def get_ksls_list(cls, task_mark, url_id, number, mark):
+        url = "http://8.217.190.241:8888/crawler/kuai_shou/blogger"
+        next_cursor = ""
+        for i in range(50):
+            payload = json.dumps({
+                "account_id": "3xk3dc57x4vpkg6",
+                "sort_type": "最热",
+                "cursor": next_cursor
+            })
+            headers = {
+                'Content-Type': 'application/json'
+            }
+            time.sleep(random.randint(1, 5))
+            response = requests.request("POST", url, headers=headers, data=payload)
+            response = response.json()
+            list = []
+            data_all_list = response["data"]
+            has_more = data_all_list["has_more"]
+            next_cursor = str(data_all_list["next_cursor"])
+            try:
+                data_list = data_all_list["data"]
+                for data in data_list:
+                    photo_id = data["photo_id"]
+                    status = sqlCollect.is_used(task_mark, photo_id, mark, "快手")
+                    if status == False:
+                        continue
+                    view_count = data["view_count"]
+                    share_count = data["share_count"]
+                    old_title = data["caption"]  # 标题
+
+                    video_percent = '%.4f' % (int(share_count) / (view_count))
+                    duration = data["duration"]
+                    duration = int(duration)/1000
+                    special = float(0.0005)
+                    if float(video_percent) < special or int(share_count) < 100 or int(duration) < 30 or (duration) > 720:
+                        Common.logger("ks-ls").info(
+                            f"不符合规则:{task_mark},用户主页id:{url_id},视频id{photo_id} ,分享:{share_count},浏览{view_count} ,时长:{int(duration)} ")
+                        continue
+                    video_url, image_url = cls.get_video(photo_id)
+                    if video_url:
+                        all_data = {"video_id": photo_id, "cover": image_url, "video_url": video_url,
+                                    "rule": video_percent,
+                                    "old_title": old_title}
+                        list.append(all_data)
+                        if len(list) == int(number):
+                            Common.logger("ks-ls").info(f"获取快手历史视频总数:{len(list)}\n")
+                            return list
+                if has_more == False:
+                    return list
+            except Exception as exc:
+                Common.logger("ks-ls").info(f"抖音历史数据获取失败:{exc}\n")
+                return list
+
+    @classmethod
+    def get_video(cls, video_id):
+        url = "http://8.217.190.241:8888/crawler/kuai_shou/detail"
+
+        payload = json.dumps({
+            "content_id": str(video_id)
+        })
+        headers = {
+            'Content-Type': 'application/json'
+        }
+
+        response = requests.request("POST", url, headers=headers, data=payload)
+        response = response.json()
+        data = response["data"]["data"]
+        video_url = data["video_url_list"][0]["video_url"]
+        image_url = data["image_url_list"][0]["image_url"]
+        return video_url, image_url
+
+
+
+if __name__ == '__main__':
+    # DYLS.get_video("7314923922602954022")
+    KSLS.get_ksls_list("1","3xk3dc57x4vpkg6",1,"1")

+ 112 - 108
data_channel/qpl_ks.py

@@ -30,120 +30,124 @@ class QplKs:
             'Accept': 'application/json',
             'Accept-Language': 'zh-CN,zh;q=0.9',
             'Content-Type': 'application/json',
-            'Cookie': 'did=web_9c6a04a4004fdb7c95a658a56ed275b6; didv=1711519980000; apdid=328ac94f-4040-41fe-a038-b60140291aca99fb22e9862c74736f53d57b666ee53b:1719580529:1; language=zh-CN; ud=2205012540; app_id=ks686235996059836592; expire_time=1800; userId=2574854626; access_token=ChFvYXV0aC5hY2Nlc3NUb2tlbhIw_udZziw8HOh4U0o0WCfzpedDFI8kxpzxIBDWKVcHSLl3CMo-cd3EZ3NiAga2_iwdGhIgQfvkqD5FqqWtQ2y0WGxSDjAiIHiWTCrCAkIo1TCQBj86fzBQzXnHCB_yplzArTKHIMzbKAUwAQ; nc_user_id=CiVhZC5ub3RpZnkuY2VudGVyLm9hdXRoLnVzZXIuaWQuc2VjcmV0EiCvobb+fVYYwtCYKsf+mX20zN6suQFLe2M1LXGeHAlWZBoSVM0G49/nJB/JmO0PwfOBK4qSIiCBBpQGJdpxBHdVNj7YLARdxkJ4HmillU9+D5S9vVlsSigFMAE=; kuaishou.ad.social_st=ChVrdWFpc2hvdS5hZC5zb2NpYWwuc3QSoAEqEbh762nhDKS1eTc-dqLlcbzwfEJSNxogQ2rgbqvb63xgQyz7U0ikGgN_r5hsOCuYQupi5PkEqOzFmB_9gYJnYjjyH50TWQjcHFgv0eVl4Dwh6tVFvz7XWi6xpFJEQ_SUU960P6KVG2cu9uhyMfVRRPbGSHJuQoi08cd1BaPYBkRJmBLbNf2IMB4bZwudF_3DlbbSfi2PYhoNpG0xVdRdGhIFcNIwV0BDDJn-zdzp3B-cqPIiIOO8CYtWIKOg8Bo5mfUyrJzIkN6eZqbKtNIGuQsj3oWuKAUwAQ; kuaishou.ad.social_ph=df0e43b95caa8ff41e6cdb3b0f1b7b93c071; did=web_9c6a04a4004fdb7c95a658a56ed275b6; didv=1711522726000',
+            'Cookie': 'did=web_9c6a04a4004fdb7c95a658a56ed275b6; didv=1711519980000; apdid=328ac94f-4040-41fe-a038-b60140291aca99fb22e9862c74736f53d57b666ee53b:1719580529:1; language=zh-CN; ud=2205012540; app_id=ks686235996059836592; expire_time=1800; userId=2574854626; access_token=ChFvYXV0aC5hY2Nlc3NUb2tlbhIwIRvxysHMF86NQiMYMtw1_s0zM4gAEA9_4VrJixHfjYqD8RBNius0YXow_kWI0B8sGhINVY3cQVhC4YDnv5YNyfZ0IKwiIObPEx_s6MCNTBHJclEO0mnOPHspy1vnwt-9536AWyqYKAUwAQ; nc_user_id=CiVhZC5ub3RpZnkuY2VudGVyLm9hdXRoLnVzZXIuaWQuc2VjcmV0EiAtVd5hI6zlT9/UG3HxzRPGDpAxZul8rkwLeQzpGx4y5xoS0fOAqkTADrggSZ6ZpciI2NO9IiBQFbr8Uk6ApRwM5LcFRtS+AAj/QbpiELCRBW7L200+nCgFMAE=; kuaishou.ad.social_st=ChVrdWFpc2hvdS5hZC5zb2NpYWwuc3QSoAFByWIrb3A2KGuX_eJCqlJa6wQTlc2l7mp_MQN_5RLJ4INIE2O3-v18nVEBafp8VK9sY5-ExW_XMpfak7gT8EoI0ft5snPqXur9Ki4gVWIbK-Z-VkyQFkAaD6qB-VtBsqiUoHEmAI9s1H-1kLMKYTCwBUbPH8TsBbnPQw0Lg8NvSQ2Gl4_DP0uA4ouuPFjKJtcoEs9SEz9VbPFfC7X0iDQFGhIk8-y_Rw9CUpd3ZdyZev2LvtUiIKLFKmL283X9GwjqcIMqjlF3erFhqI1QIA9aK-z1gQh3KAUwAQ; kuaishou.ad.social_ph=8764c8e6b60f8cff08b469a56da3fdbe2482; did=web_9c6a04a4004fdb7c95a658a56ed275b6; didv=1711522726000',
             'Origin': 'https://k.kuaishou.com',
             'Pragma': 'no-cache',
             'Referer': 'https://k.kuaishou.com/',
             'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
         }
-        for count in fans_count:
-            (fans_min_num, fans_max_num), = count.items()
-            for content in content_type:
-                (content_name, content_id), = content.items()
-                current_page = 1
-                while True:
-                    payload_data = {
-                        "currentPage": current_page,
-                        "pageSize": 20,
-                        "starOrderTag": 3,
-                        "taskType": 1,
-                        "marketingGoal": 1,
-                        "viewerAgeList": [  # 观众画像 50+
-                            {
-                                "from": "0.50",
-                                "to": "1.00",
-                                "queryName": "50+"
-                            }
-                        ],
-                        "userName": "",
-                        "vitalityTags": [  # 活跃度 近期短视频活跃
-                            "668"
-                        ],
-                        "contentTagIdList": [  # 内容类型
-                            {
-                                "id": str(content_id),
-                                "selectFirstLevelId": True
-                            }
-                        ],
-                        # "fansMinNum": 1000000,  # 粉丝数量
-                        # "fansMaxNum": 3000000,  # 粉丝数量
-                    }
-                    if fans_min_num > 0:
-                        payload_data["fansMinNum"] = fans_min_num
-                    if fans_max_num > 0:
-                        payload_data["fansMaxNum"] = fans_max_num
-                    payload = json.dumps(payload_data)
-
-                    time.sleep(random.randint(10, 20))
-                    response = requests.request("POST", url, headers=headers, data=payload)
-                    response = response.json()
-                    print(f"开始扫描{content}")
-
-                    result = response["result"]
-                    if result == 1:
-                        total = response["total"]  # 总条数
-                        if total == 0:
-                            print(f"没有扫描到数据{content}")
+        # for count in fans_count:
+        #     (fans_min_num, fans_max_num), = count.items()
+        for content in content_type:
+            (content_name, content_id), = content.items()
+            current_page = 1
+            while True:
+                payload_data = {
+                    "currentPage": current_page,
+                    "pageSize": 20,
+                    "starOrderTag": 3,
+                    "taskType": 1,
+                    "marketingGoal": 1,
+                    "viewerAgeList": [  # 观众画像 50+
+                        {
+                            "from": "0.50",
+                            "to": "1.00",
+                            "queryName": "50+"
+                        }
+                    ],
+                    "userName": "",
+                    "vitalityTags": [  # 活跃度 近期短视频活跃
+                        "668"
+                    ],
+                    "contentTagIdList": [  # 内容类型
+                        {
+                            "id": str(content_id),
+                            "selectFirstLevelId": True
+                        }
+                    ],
+                    "starTagIds": [  # 高调性
+                        13
+                    ]
+                    # "fansMinNum": 1000000,  # 粉丝数量
+                    # "fansMaxNum": 3000000,  # 粉丝数量
+                }
+                # if fans_min_num > 0:
+                #     payload_data["fansMinNum"] = fans_min_num
+                # if fans_max_num > 0:
+                #     payload_data["fansMaxNum"] = fans_max_num
+                payload = json.dumps(payload_data)
+
+                time.sleep(random.randint(10, 20))
+                response = requests.request("POST", url, headers=headers, data=payload)
+                response = response.json()
+                print(f"开始扫描{content}")
+
+                result = response["result"]
+                if result == 1:
+                    total = response["total"]  # 总条数
+                    if total == 0:
+                        print(f"没有扫描到数据{content}")
+                        break
+                    star_list = response["starList"]
+                    if len(star_list) == 0 or star_list == []:
+                        break
+                    for star in star_list:
+                        print(f"扫描到一条数据{content}")
+                        user_id = star["userId"]
+                        star_id = star["starId"]
+                        name = star["name"]  # 用户名
+                        kwai_id = star["kwaiId"]  # 用户名id
+                        gender = star["gender"]  # 性别
+                        fans_number = star["fansNumber"]  # 粉丝数
+                        profile_id = star["profileId"]  # 主页id
+                        star_tag_str = star["starTagStr"]  # 内容类型1
+                        industry_tag_str = star["industryTagStr"]  # 内容类型2
+                        photo_expect_play = star["photoExpectPlay"]  # 预期播放量
+                        photo_expect_cpm = star["photoExpectCpm"]  # 预期CPM
+                        photo_interaction_rate = star["photoInteractionRate"]  # 互动率
+                        photo_complete_play_rate = star["photoCompletePlayRate"]  # 完播率
+                        fans_increase_num = star["fansIncreaseNum"]  # 粉丝增长量
+                        fans_increase_rate = star["fansIncreaseRate"]  # 粉丝增长率
+                        # res = sqlCollect.insert_ks_qpl_data(user_id, star_id, name, kwai_id, gender, fans_number, profile_id, star_tag_str, industry_tag_str, photo_expect_play, photo_expect_cpm, photo_interaction_rate, photo_complete_play_rate, fans_increase_num, fans_increase_rate)
+                        # if res == 1:
+                        current_time = datetime.now()
+                        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                            # if fans_min_num == 0:
+                            #     fans = f"{fans_max_num}及以下"
+                            # if fans_max_num == 0:
+                            #     fans = f"{fans_min_num}及以上"
+                            # else:
+                            #     fans = f"{fans_min_num}~{fans_max_num}"
+                        values = [
+                            [
+                                name,
+                                gender,
+                                fans_number,
+                                profile_id,
+                                "https://www.kuaishou.com/profile/"+profile_id,
+                                star_tag_str,
+                                industry_tag_str,
+                                photo_expect_play,
+                                photo_expect_cpm,
+                                photo_interaction_rate,
+                                photo_complete_play_rate,
+                                fans_increase_num,
+                                fans_increase_rate,
+                                # fans,
+                                formatted_time
+                            ]
+                        ]
+                        Feishu.insert_columns("GjGZsmW2ahaCe4tmzDTc58tVnbe", "emf56g", "ROWS", 1, 2)
+                        time.sleep(0.5)
+                        Feishu.update_values("GjGZsmW2ahaCe4tmzDTc58tVnbe", "emf56g", "A2:Z2", values)
+                        print(f"入库到一条数据{content}")
+                    current_page += 1
+                    if total < 20:
+                        break
+                    else:
+                        page = int(total)/20
+                        if current_page > int(page)+1:
                             break
-                        star_list = response["starList"]
-                        if len(star_list) == 0 or star_list == []:
-                            break
-                        for star in star_list:
-                            print(f"扫描到一条数据{content}")
-                            user_id = star["userId"]
-                            star_id = star["starId"]
-                            name = star["name"]  # 用户名
-                            kwai_id = star["kwaiId"]  # 用户名id
-                            gender = star["gender"]  # 性别
-                            fans_number = star["fansNumber"]  # 粉丝数
-                            profile_id = star["profileId"]  # 主页id
-                            star_tag_str = star["starTagStr"]  # 内容类型1
-                            industry_tag_str = star["industryTagStr"]  # 内容类型2
-                            photo_expect_play = star["photoExpectPlay"]  # 预期播放量
-                            photo_expect_cpm = star["photoExpectCpm"]  # 预期CPM
-                            photo_interaction_rate = star["photoInteractionRate"]  # 互动率
-                            photo_complete_play_rate = star["photoCompletePlayRate"]  # 完播率
-                            fans_increase_num = star["fansIncreaseNum"]  # 粉丝增长量
-                            fans_increase_rate = star["fansIncreaseRate"]  # 粉丝增长率
-                            res = sqlCollect.insert_ks_qpl_data(user_id, star_id, name, kwai_id, gender, fans_number, profile_id, star_tag_str, industry_tag_str, photo_expect_play, photo_expect_cpm, photo_interaction_rate, photo_complete_play_rate, fans_increase_num, fans_increase_rate)
-                            if res == 1:
-                                current_time = datetime.now()
-                                formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
-                                if fans_min_num == 0:
-                                    fans = f"{fans_max_num}及以下"
-                                if fans_max_num == 0:
-                                    fans = f"{fans_min_num}及以上"
-                                else:
-                                    fans = f"{fans_min_num}~{fans_max_num}"
-                                values = [
-                                    [
-                                        name,
-                                        gender,
-                                        fans_number,
-                                        "https://www.kuaishou.com/profile/"+profile_id,
-                                        star_tag_str,
-                                        industry_tag_str,
-                                        photo_expect_play,
-                                        photo_expect_cpm,
-                                        photo_interaction_rate,
-                                        photo_complete_play_rate,
-                                        fans_increase_num,
-                                        fans_increase_rate,
-                                        fans,
-                                        formatted_time
-                                    ]
-                                ]
-                                Feishu.insert_columns("GjGZsmW2ahaCe4tmzDTc58tVnbe", "4a9d77", "ROWS", 1, 2)
-                                time.sleep(0.5)
-                                Feishu.update_values("GjGZsmW2ahaCe4tmzDTc58tVnbe", "4a9d77", "A2:Z2", values)
-                                current_page += 1
-                                print(f"入库到一条数据{content}")
-                        if total < 20:
-                            break
-                        else:
-                            page = int(total)/20
-                            if current_page > int(page):
-                                break
 
 
 

+ 12 - 1
video_rewriting/video_processor.py

@@ -13,6 +13,8 @@ from common import Material, Feishu, Common, Oss
 from common.ffmpeg import FFmpeg
 from common.gpt4o_help import GPT4o
 from data_channel.douyin import DY
+from data_channel.dy_ls import DYLS
+from data_channel.ks_ls import KSLS
 from data_channel.kuaishou import KS
 from data_channel.kuaishouchuangzuozhe import KsFeedVideo
 from data_channel.piaoquan import PQ
@@ -205,6 +207,10 @@ class VideoProcessor:
             return KsFeedVideo.get_data()
         elif channel_id == "视频号单视频":
             return SPHDD.get_sphdd_data(url)
+        elif channel_id == "抖音历史":
+            return DYLS.get_dyls_list(task_mark, url, number, mark)
+        elif channel_id == "快手历史":
+            return KSLS.get_ksls_list(task_mark, url, number, mark)
 
     @classmethod
     def generate_title(cls, video, title):
@@ -364,6 +370,10 @@ class VideoProcessor:
                 formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
                 sqlCollect.insert_machine_making_data(name, task_mark, channel_id, url, v_id, piaoquan_id, new_title, code,
                                                       formatted_time, old_title, oss_object_key)
+                if channel_id == "快手历史" or channel_id == "抖音历史":
+                    explain = "历史爆款"
+                else:
+                    explain = "新供给"
                 values = [
                     [
                         name,
@@ -377,7 +387,8 @@ class VideoProcessor:
                         new_title,
                         str(code),
                         formatted_time,
-                        str(rule)
+                        str(rule),
+                        explain
                     ]
                 ]
                 return values, code