qpl_ks.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. import random
  2. import time
  3. from datetime import datetime
  4. import requests
  5. import json
  6. from common import Feishu
  7. from common.sql_help import sqlCollect
  8. class QplKs:
  9. @classmethod
  10. def ks_data_list(cls):
  11. # content_type = [{'美食': '673'}, {'旅游': '679'}, {'音乐': '687'}, {'时政资讯': '692'}, {'健康医疗': '684'},
  12. # {'军事': '701'}, {'教育培训': '685'}, {'生活': '677'}, {'舞蹈': '688'},
  13. # {'母婴亲子': '676'}, {'萌宠': '691'}, {'情感': '696'}, {'搞笑': '695'}, {'人文': '697'},
  14. # {'三农': '694'}, {'科学与法律': '693'}, {'读书': '689'}, {'奇人异象': '700'}, {'民生资讯': '703'},
  15. # {'纪实类': '705'}, {'财经投资': '690'}, {'摄影': '704'}, {'艺术文化': '682'}, {'房产家居': '683'},
  16. # {'短剧': '674'}, {'时尚': '675'}, {'影视娱乐': '702'}, {'体育运动': '680'}, {'健身达人': '706'},
  17. # {'星座命理': '699'}, {'汽车': '678'}, {'高新数码': '686'}]
  18. content_type = [{'旅游': '679'}, {'音乐': '687'}, {'时政资讯': '692'}, {'健康医疗': '684'},
  19. {'军事': '701'}, {'教育培训': '685'}, {'生活': '677'}, {'舞蹈': '688'},
  20. {'母婴亲子': '676'}, {'萌宠': '691'}, {'情感': '696'}, {'搞笑': '695'}, {'人文': '697'},
  21. {'三农': '694'}, {'科学与法律': '693'}, {'读书': '689'}, {'奇人异象': '700'}, {'民生资讯': '703'},
  22. {'纪实类': '705'}, {'财经投资': '690'}, {'摄影': '704'}, {'艺术文化': '682'}, {'房产家居': '683'},
  23. {'短剧': '674'}, {'时尚': '675'}, {'影视娱乐': '702'}, {'体育运动': '680'}, {'健身达人': '706'},
  24. {'星座命理': '699'}, {'汽车': '678'}, {'高新数码': '686'}]
  25. fans_count = [{0: 100000}, {100000: 1000000}, {1000000: 3000000}, {3000000: 5000000}, {5000000: 10000000},
  26. {10000000: 0}]
  27. # fans_count = [{10000000: 0}, {5000000: 10000000}]
  28. url = "https://k.kuaishou.com/rest/web/star/list"
  29. headers = {
  30. 'Accept': 'application/json',
  31. 'Accept-Language': 'zh-CN,zh;q=0.9',
  32. 'Content-Type': 'application/json',
  33. 'Cookie': 'did=web_9c6a04a4004fdb7c95a658a56ed275b6; didv=1711519980000; apdid=328ac94f-4040-41fe-a038-b60140291aca99fb22e9862c74736f53d57b666ee53b:1719580529:1; language=zh-CN; ud=2205012540; app_id=ks686235996059836592; expire_time=1800; userId=2574854626; access_token=ChFvYXV0aC5hY2Nlc3NUb2tlbhIwIRvxysHMF86NQiMYMtw1_s0zM4gAEA9_4VrJixHfjYqD8RBNius0YXow_kWI0B8sGhINVY3cQVhC4YDnv5YNyfZ0IKwiIObPEx_s6MCNTBHJclEO0mnOPHspy1vnwt-9536AWyqYKAUwAQ; nc_user_id=CiVhZC5ub3RpZnkuY2VudGVyLm9hdXRoLnVzZXIuaWQuc2VjcmV0EiAtVd5hI6zlT9/UG3HxzRPGDpAxZul8rkwLeQzpGx4y5xoS0fOAqkTADrggSZ6ZpciI2NO9IiBQFbr8Uk6ApRwM5LcFRtS+AAj/QbpiELCRBW7L200+nCgFMAE=; kuaishou.ad.social_st=ChVrdWFpc2hvdS5hZC5zb2NpYWwuc3QSoAFByWIrb3A2KGuX_eJCqlJa6wQTlc2l7mp_MQN_5RLJ4INIE2O3-v18nVEBafp8VK9sY5-ExW_XMpfak7gT8EoI0ft5snPqXur9Ki4gVWIbK-Z-VkyQFkAaD6qB-VtBsqiUoHEmAI9s1H-1kLMKYTCwBUbPH8TsBbnPQw0Lg8NvSQ2Gl4_DP0uA4ouuPFjKJtcoEs9SEz9VbPFfC7X0iDQFGhIk8-y_Rw9CUpd3ZdyZev2LvtUiIKLFKmL283X9GwjqcIMqjlF3erFhqI1QIA9aK-z1gQh3KAUwAQ; kuaishou.ad.social_ph=8764c8e6b60f8cff08b469a56da3fdbe2482; did=web_9c6a04a4004fdb7c95a658a56ed275b6; didv=1711522726000',
  34. 'Origin': 'https://k.kuaishou.com',
  35. 'Pragma': 'no-cache',
  36. 'Referer': 'https://k.kuaishou.com/',
  37. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
  38. }
  39. for count in fans_count:
  40. (fans_min_num, fans_max_num), = count.items()
  41. for content in content_type:
  42. (content_name, content_id), = content.items()
  43. current_page = 1
  44. while True:
  45. payload_data = {
  46. "currentPage": current_page,
  47. "pageSize": 20,
  48. "starOrderTag": 3,
  49. "taskType": 1,
  50. "marketingGoal": 1,
  51. "viewerAgeList": [ # 观众画像 50+
  52. {
  53. "from": "0.50",
  54. "to": "1.00",
  55. "queryName": "50+"
  56. }
  57. ],
  58. "userName": "",
  59. "vitalityTags": [ # 活跃度 近期短视频活跃
  60. "668"
  61. ],
  62. "contentTagIdList": [ # 内容类型
  63. {
  64. "id": str(content_id),
  65. "selectFirstLevelId": True
  66. }
  67. ],
  68. # "starTagIds": [ # 高调性
  69. # 13
  70. # ]
  71. # "fansMinNum": 1000000, # 粉丝数量
  72. # "fansMaxNum": 3000000, # 粉丝数量
  73. }
  74. if fans_min_num > 0:
  75. payload_data["fansMinNum"] = fans_min_num
  76. if fans_max_num > 0:
  77. payload_data["fansMaxNum"] = fans_max_num
  78. payload = json.dumps(payload_data)
  79. time.sleep(random.randint(10, 20))
  80. response = requests.request("POST", url, headers=headers, data=payload)
  81. response = response.json()
  82. print(f"开始扫描{content}")
  83. result = response["result"]
  84. if result == 1:
  85. total = response["total"] # 总条数
  86. if total == 0:
  87. print(f"没有扫描到数据{content}")
  88. break
  89. star_list = response["starList"]
  90. if len(star_list) == 0 or star_list == []:
  91. break
  92. for star in star_list:
  93. print(f"扫描到一条数据{content}")
  94. user_id = star["userId"]
  95. star_id = star["starId"]
  96. name = star["name"] # 用户名
  97. kwai_id = star["kwaiId"] # 用户名id
  98. gender = star["gender"] # 性别
  99. fans_number = star["fansNumber"] # 粉丝数
  100. profile_id = star["profileId"] # 主页id
  101. star_tag_str = star["starTagStr"] # 内容类型1
  102. industry_tag_str = star["industryTagStr"] # 内容类型2
  103. photo_expect_play = star["photoExpectPlay"] # 预期播放量
  104. photo_expect_cpm = star["photoExpectCpm"] # 预期CPM
  105. photo_interaction_rate = star["photoInteractionRate"] # 互动率
  106. photo_complete_play_rate = star["photoCompletePlayRate"] # 完播率
  107. fans_increase_num = star["fansIncreaseNum"] # 粉丝增长量
  108. fans_increase_rate = star["fansIncreaseRate"] # 粉丝增长率
  109. # res = sqlCollect.insert_ks_qpl_data(user_id, star_id, name, kwai_id, gender, fans_number, profile_id, star_tag_str, industry_tag_str, photo_expect_play, photo_expect_cpm, photo_interaction_rate, photo_complete_play_rate, fans_increase_num, fans_increase_rate)
  110. # if res == 1:
  111. current_time = datetime.now()
  112. formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
  113. if fans_min_num == 0:
  114. fans = f"{fans_max_num}及以下"
  115. if fans_max_num == 0:
  116. fans = f"{fans_min_num}及以上"
  117. else:
  118. fans = f"{fans_min_num}~{fans_max_num}"
  119. values = [
  120. [
  121. name,
  122. gender,
  123. fans_number,
  124. profile_id,
  125. "https://www.kuaishou.com/profile/"+profile_id,
  126. star_tag_str,
  127. industry_tag_str,
  128. photo_expect_play,
  129. photo_expect_cpm,
  130. photo_interaction_rate,
  131. photo_complete_play_rate,
  132. fans_increase_num,
  133. fans_increase_rate,
  134. fans,
  135. formatted_time
  136. ]
  137. ]
  138. Feishu.insert_columns("GjGZsmW2ahaCe4tmzDTc58tVnbe", "COyUUm", "ROWS", 1, 2)
  139. time.sleep(0.5)
  140. Feishu.update_values("GjGZsmW2ahaCe4tmzDTc58tVnbe", "COyUUm", "A2:Z2", values)
  141. print(f"入库到一条数据{content}")
  142. current_page += 1
  143. if total < 20:
  144. break
  145. else:
  146. page = int(total)/20
  147. if current_page > int(page)+1:
  148. break
  149. if __name__ == '__main__':
  150. QplKs.ks_data_list()