Explorar el Código

增加快手品类启动脚本

zhangyong hace 10 meses
padre
commit
48d16805fb
Se han modificado 4 ficheros con 249 adiciones y 0 borrados
  1. 10 0
      common/sql_help.py
  2. 169 0
      data_channel/qpl_ks.py
  3. 68 0
      job_pl_ks.py
  4. 2 0
      video_rewriting/video_processor.py

+ 10 - 0
common/sql_help.py

@@ -112,3 +112,13 @@ class sqlCollect():
             sql=sql
         )
         return res
+
+    @classmethod
+    def insert_ks_qpl_data(cls, user_id: str, star_id: str, name, kwai_id: str, gender: str, fans_number: str,
+                       profile_id: str, star_tag_str: str, industry_tag_str: str, photo_expect_play: str, photo_expect_cpm: str, photo_interaction_rate: str,
+                       photo_complete_play_rate: str, fans_increase_num: str, fans_increase_rate: str,):
+        insert_sql = f"""INSERT INTO ks_qpl_data (user_id, star_id, name, kwai_id, gender, fans_number, profile_id, star_tag_str, industry_tag_str, photo_expect_play, photo_expect_cpm, photo_interaction_rate, photo_complete_play_rate, fans_increase_num, fans_increase_rate) values ("{user_id}", "{star_id}", "{name}", "{kwai_id}", "{gender}", "{fans_number}", "{profile_id}", "{star_tag_str}", "{industry_tag_str}", "{photo_expect_play}", "{photo_expect_cpm}", "{photo_interaction_rate}", "{photo_complete_play_rate}", "{fans_increase_num}", "{fans_increase_rate}")"""
+        res = MysqlHelper.update_values(
+            sql=insert_sql
+        )
+        return res

+ 169 - 0
data_channel/qpl_ks.py

@@ -0,0 +1,169 @@
+import random
+import time
+from datetime import datetime
+
+import requests
+import json
+
+from common import Feishu
+from common.sql_help import sqlCollect
+
+
+class QplKs:
+    @classmethod
+    def ks_data_list(cls):
+        content_type = [{'美食': '673'}, {'短剧': '674'}, {'时尚': '675'}, {'母婴亲子': '676'}, {'生活': '677'},
+                        {'汽车': '678'}, {'旅游': '679'}, {'体育运动': '680'}, {'艺术文化': '682'}, {'房产家居': '683'},
+                        {'健康医疗': '684'}, {'教育培训': '685'}, {'高新数码': '686'}, {'音乐': '687'}, {'舞蹈': '688'}, {'读书': '689'},
+                        {'摄影': '704'},
+                        {'财经投资': '690'}, {'萌宠': '691'}, {'时政资讯': '692'}, {'科学与法律': '693'}, {'三农': '694'}, {'搞笑': '695'},
+                        {'情感': '696'},
+                        {'人文': '697'}, {'星座命理': '699'}, {'奇人异象': '700'}, {'军事': '701'}, {'影视娱乐': '702'},
+                        {'民生资讯': '703'}, {'健身达人': '706'}, {'纪实类': '705'}]
+        # fans_count = [{0: 100000}, {100000: 1000000}, {1000000: 3000000}, {3000000: 5000000}, {5000000: 10000000},
+        #               {10000000: 0}]
+
+
+        fans_count = [{10000000: 0}, {5000000: 10000000}]
+        url = "https://k.kuaishou.com/rest/web/star/list"
+        headers = {
+            'Accept': 'application/json',
+            'Accept-Language': 'zh-CN,zh;q=0.9',
+            'Content-Type': 'application/json',
+            'Cookie': 'did=web_9c6a04a4004fdb7c95a658a56ed275b6; didv=1711519980000; apdid=328ac94f-4040-41fe-a038-b60140291aca99fb22e9862c74736f53d57b666ee53b:1719580529:1; language=zh-CN; ud=2205012540; app_id=ks686235996059836592; expire_time=1800; userId=2574854626; access_token=ChFvYXV0aC5hY2Nlc3NUb2tlbhIw_udZziw8HOh4U0o0WCfzpedDFI8kxpzxIBDWKVcHSLl3CMo-cd3EZ3NiAga2_iwdGhIgQfvkqD5FqqWtQ2y0WGxSDjAiIHiWTCrCAkIo1TCQBj86fzBQzXnHCB_yplzArTKHIMzbKAUwAQ; nc_user_id=CiVhZC5ub3RpZnkuY2VudGVyLm9hdXRoLnVzZXIuaWQuc2VjcmV0EiCvobb+fVYYwtCYKsf+mX20zN6suQFLe2M1LXGeHAlWZBoSVM0G49/nJB/JmO0PwfOBK4qSIiCBBpQGJdpxBHdVNj7YLARdxkJ4HmillU9+D5S9vVlsSigFMAE=; kuaishou.ad.social_st=ChVrdWFpc2hvdS5hZC5zb2NpYWwuc3QSoAEqEbh762nhDKS1eTc-dqLlcbzwfEJSNxogQ2rgbqvb63xgQyz7U0ikGgN_r5hsOCuYQupi5PkEqOzFmB_9gYJnYjjyH50TWQjcHFgv0eVl4Dwh6tVFvz7XWi6xpFJEQ_SUU960P6KVG2cu9uhyMfVRRPbGSHJuQoi08cd1BaPYBkRJmBLbNf2IMB4bZwudF_3DlbbSfi2PYhoNpG0xVdRdGhIFcNIwV0BDDJn-zdzp3B-cqPIiIOO8CYtWIKOg8Bo5mfUyrJzIkN6eZqbKtNIGuQsj3oWuKAUwAQ; kuaishou.ad.social_ph=df0e43b95caa8ff41e6cdb3b0f1b7b93c071; did=web_9c6a04a4004fdb7c95a658a56ed275b6; didv=1711522726000',
+            'Origin': 'https://k.kuaishou.com',
+            'Pragma': 'no-cache',
+            'Referer': 'https://k.kuaishou.com/',
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
+        }
+        for count in fans_count:
+            (fans_min_num, fans_max_num), = count.items()
+            for content in content_type:
+                (content_name, content_id), = content.items()
+                current_page = 1
+                while True:
+                    payload_data = {
+                        "currentPage": current_page,
+                        "pageSize": 20,
+                        "starOrderTag": 3,
+                        "taskType": 1,
+                        "marketingGoal": 1,
+                        "viewerAgeList": [  # 观众画像 50+
+                            {
+                                "from": "0.50",
+                                "to": "1.00",
+                                "queryName": "50+"
+                            }
+                        ],
+                        "userName": "",
+                        "vitalityTags": [  # 活跃度 近期短视频活跃
+                            "668"
+                        ],
+                        "contentTagIdList": [  # 内容类型
+                            {
+                                "id": str(content_id),
+                                "selectFirstLevelId": True
+                            }
+                        ],
+                        # "fansMinNum": 1000000,  # 粉丝数量
+                        # "fansMaxNum": 3000000,  # 粉丝数量
+                    }
+                    if fans_min_num > 0:
+                        payload_data["fansMinNum"] = fans_min_num
+                    if fans_max_num > 0:
+                        payload_data["fansMaxNum"] = fans_max_num
+                    payload = json.dumps(payload_data)
+
+                    time.sleep(random.randint(10, 20))
+                    response = requests.request("POST", url, headers=headers, data=payload)
+                    response = response.json()
+                    print(f"开始扫描{content}")
+
+                    result = response["result"]
+                    if result == 1:
+                        total = response["total"]  # 总条数
+                        if total == 0:
+                            print(f"没有扫描到数据{content}")
+                            break
+                        star_list = response["starList"]
+                        if len(star_list) == 0 or star_list == []:
+                            break
+                        for star in star_list:
+                            print(f"扫描到一条数据{content}")
+                            user_id = star["userId"]
+                            star_id = star["starId"]
+                            name = star["name"]  # 用户名
+                            kwai_id = star["kwaiId"]  # 用户名id
+                            gender = star["gender"]  # 性别
+                            fans_number = star["fansNumber"]  # 粉丝数
+                            profile_id = star["profileId"]  # 主页id
+                            star_tag_str = star["starTagStr"]  # 内容类型1
+                            industry_tag_str = star["industryTagStr"]  # 内容类型2
+                            photo_expect_play = star["photoExpectPlay"]  # 预期播放量
+                            photo_expect_cpm = star["photoExpectCpm"]  # 预期CPM
+                            photo_interaction_rate = star["photoInteractionRate"]  # 互动率
+                            photo_complete_play_rate = star["photoCompletePlayRate"]  # 完播率
+                            fans_increase_num = star["fansIncreaseNum"]  # 粉丝增长量
+                            fans_increase_rate = star["fansIncreaseRate"]  # 粉丝增长率
+                            res = sqlCollect.insert_ks_qpl_data(user_id, star_id, name, kwai_id, gender, fans_number, profile_id, star_tag_str, industry_tag_str, photo_expect_play, photo_expect_cpm, photo_interaction_rate, photo_complete_play_rate, fans_increase_num, fans_increase_rate)
+                            if res == 1:
+                                current_time = datetime.now()
+                                formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                                if fans_min_num == 0:
+                                    fans = f"{fans_max_num}及以下"
+                                if fans_max_num == 0:
+                                    fans = f"{fans_min_num}及以上"
+                                else:
+                                    fans = f"{fans_min_num}~{fans_max_num}"
+                                values = [
+                                    [
+                                        name,
+                                        gender,
+                                        fans_number,
+                                        "https://www.kuaishou.com/profile/"+profile_id,
+                                        star_tag_str,
+                                        industry_tag_str,
+                                        photo_expect_play,
+                                        photo_expect_cpm,
+                                        photo_interaction_rate,
+                                        photo_complete_play_rate,
+                                        fans_increase_num,
+                                        fans_increase_rate,
+                                        fans,
+                                        formatted_time
+                                    ]
+                                ]
+                                Feishu.insert_columns("GjGZsmW2ahaCe4tmzDTc58tVnbe", "4a9d77", "ROWS", 1, 2)
+                                time.sleep(0.5)
+                                Feishu.update_values("GjGZsmW2ahaCe4tmzDTc58tVnbe", "4a9d77", "A2:Z2", values)
+                                current_page += 1
+                                print(f"入库到一条数据{content}")
+                        if total < 20:
+                            break
+                        else:
+                            page = int(total)/20
+                            if current_page > int(page):
+                                break
+
+
+
+
+
+
+
+
+
+
+
+if __name__ == '__main__':
+    QplKs.ks_data_list()
+
+
+
+
+
+
+
+
+
+

+ 68 - 0
job_pl_ks.py

@@ -0,0 +1,68 @@
+import os
+import concurrent.futures
+
+import schedule
+import time
+import threading
+from common import Material
+# 控制读写速度的参数
+from video_rewriting.video_processor import VideoProcessor
+
+MAX_BPS = 120 * 1024 * 1024  # 120MB/s
+MAX_WORKERS = os.cpu_count() * 2  # 线程池最大工作线程数量
+READ_WRITE_CHUNK_SIZE = 1024 * 1024  # 每次读写的块大小 (1MB)
+SLEEP_INTERVAL = READ_WRITE_CHUNK_SIZE / MAX_BPS  # 控制每次读写的延迟时间
+# 全局锁,用于同步读写操作
+lock = threading.Lock()
+
+
+def video_task_start(data):
+    mark = VideoProcessor.main(data)
+    print(f"返回用户名{mark}")
+
+
+
+def controlled_io_operation(data):
+    with lock:
+        start_time = time.time()
+        time.sleep(SLEEP_INTERVAL)
+        end_time = time.time()
+        elapsed_time = end_time - start_time
+        if elapsed_time < SLEEP_INTERVAL:
+            time.sleep(SLEEP_INTERVAL - elapsed_time)
+    video_task_start(data)
+
+
+
+
+def video_start():
+    print("开始执行生成视频脚本.")
+
+    data = Material.feishu_list()
+    data = data[13]
+    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+        futures = {executor.submit(controlled_io_operation, data)}
+        for future in concurrent.futures.as_completed(futures):
+            try:
+                future.result()
+                print("处理结果: 成功")
+            except concurrent.futures.TimeoutError:
+                print("任务超时,已取消.")
+            except Exception as e:
+                print("处理任务时出现异常:", e)
+    print("执行生成视频脚本结束.")
+
+video_start()
+
+
+schedule.every(1).hours.do(video_start)
+# schedule.every(20).minutes.do(video_start)
+
+
+
+
+while True:
+    schedule.run_pending()
+    time.sleep(1)
+
+

+ 2 - 0
video_rewriting/video_processor.py

@@ -176,6 +176,8 @@ class VideoProcessor:
                                 sheet = "b0uLWw"
                             elif name == "视频号单视频":
                                 sheet = "ptgCXW"
+                            elif name == "快手品类账号":
+                                sheet = "ibjoMx"
                             Feishu.insert_columns("ILb4sa0LahddRktnRipcu2vQnLb", sheet, "ROWS", 1, 2)
                             time.sleep(0.5)
                             Feishu.update_values("ILb4sa0LahddRktnRipcu2vQnLb", sheet, "A2:Z2", values)