douyin_author.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. # -*- coding: utf-8 -*-
  2. # @Time: 2023/12/22
  3. import datetime
  4. import os
  5. import random
  6. import sys
  7. import time
  8. from datetime import datetime
  9. import requests
  10. import json
  11. import urllib3
  12. from extract_data.douyin.douyin_author_help import DouYinHelper
  13. sys.path.append(os.getcwd())
  14. from common.aliyun_oss_uploading import Oss
  15. from common.common import Common
  16. from common.material import Material
  17. from common.feishu import Feishu
  18. from common.db import MysqlHelper
  19. from requests.adapters import HTTPAdapter
  20. class douyinAuthor():
  21. """
  22. 获取抖音用户主页id
  23. """
  24. @classmethod
  25. def get_videoUserId(cls, mark):
  26. select_user_sql = f"""select user_id, channel from agc_channel_data where mark = '{mark}' and channel = '抖音' ORDER BY id DESC;"""
  27. user_list = MysqlHelper.get_values(select_user_sql, "prod")
  28. return user_list
  29. """
  30. oss视频地址 存入数据库
  31. """
  32. @classmethod
  33. def insert_videoUrl(cls, video_id, account_id, oss_object_key, mark):
  34. current_time = datetime.now()
  35. formatted_time = current_time.strftime("%Y-%m-%d %H:%M")
  36. insert_sql = f"""INSERT INTO agc_video_url (video_id, account_id, oss_object_key, time , status, mark) values ("{video_id}", "{account_id}", "{oss_object_key}", "{formatted_time}", 1, "{mark}")"""
  37. MysqlHelper.update_values(
  38. sql=insert_sql,
  39. env="prod",
  40. machine="",
  41. )
  42. """
  43. 查询该video_id是否在数据库存在
  44. """
  45. @classmethod
  46. def select_videoUrl_id(cls, video_id, mark):
  47. select_user_sql = f"""select video_id from agc_video_url where video_id='{video_id}' and mark = '{mark}';"""
  48. user_list = MysqlHelper.get_values(select_user_sql, "prod")
  49. if user_list:
  50. return True
  51. else:
  52. return False
  53. """抖音读取数据 将数据存储到oss上"""
  54. @classmethod
  55. def get_videoList(cls, data):
  56. try:
  57. mark = data['mark']
  58. mark_name = data['mark_name']
  59. token = data['token']
  60. feishu_id = data['feishu_id']
  61. channel_id = data['channel'][0]
  62. channel = data['channel'][1]
  63. user_list = Material.insert_user(feishu_id, channel_id, mark, channel)
  64. cookie = Material.get_cookie(feishu_id, token, channel)
  65. if len(user_list) == 0:
  66. return
  67. for account_id in user_list:
  68. Common.logger("douyin").info(f"用户主页ID:{account_id}")
  69. next_cursor = 0
  70. count = 0
  71. exit_flag = False
  72. while True:
  73. if exit_flag:
  74. # 结束 while 循环
  75. break
  76. if next_cursor == None:
  77. break
  78. if count > 5:
  79. continue
  80. time.sleep(random.randint(5, 10))
  81. url = 'https://www.douyin.com/aweme/v1/web/aweme/post/'
  82. headers = {
  83. 'Accept': 'application/json, text/plain, */*',
  84. 'Accept-Language': 'zh-CN,zh;q=0.9',
  85. 'Cache-Control': 'no-cache',
  86. 'Cookie': cookie,
  87. 'Pragma': 'no-cache',
  88. 'Referer': f'https://www.douyin.com/user/{account_id}',
  89. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
  90. 'Chrome/118.0.0.0 Safari/537.36',
  91. }
  92. query = DouYinHelper.get_full_query(ua=headers['User-Agent'], extra_data={
  93. 'sec_user_id': account_id,
  94. 'max_cursor': next_cursor,
  95. 'locate_query': 'false',
  96. 'show_live_replay_strategy': '1',
  97. 'need_time_list': '1',
  98. 'time_list_query': '0',
  99. 'whale_cut_token': '',
  100. 'cut_version': '1',
  101. 'count': '18',
  102. 'publish_video_strategy_type': '2',
  103. })
  104. urllib3.disable_warnings()
  105. s = requests.session()
  106. s.mount('http://', HTTPAdapter(max_retries=3))
  107. s.mount('https://', HTTPAdapter(max_retries=3))
  108. response = requests.request(method='GET', url=url, headers=headers, params=query)
  109. text = response.text
  110. if len(text) == 0:
  111. time.sleep(60)
  112. continue
  113. body = response.content.decode()
  114. obj = json.loads(body)
  115. has_more = True if obj.get('has_more', 0) == 1 else False
  116. next_cursor = str(obj.get('max_cursor')) if has_more else None
  117. data = obj.get('aweme_list', [])
  118. response.close()
  119. if response.status_code != 200:
  120. if "-" in mark:
  121. mark = mark.split("-")[0]
  122. Common.logger("douyin").info(
  123. f"接口请求失败,请更换cookie,{response.status_code}")
  124. Feishu.bot('recommend', '抖音', f'抖音cookie失效,请及时更换~', mark, mark_name)
  125. # 如果返回空信息,则随机睡眠 600, 1200 秒
  126. time.sleep(random.randint(600, 1200))
  127. continue
  128. elif len(data) == 0:
  129. Common.logger("douyin").info(
  130. f"接口请求失败,请更换cookie")
  131. continue
  132. for j in range(len(data)):
  133. try:
  134. entity_type = data[j].get('search_impr').get('entity_type')
  135. Common.logger("douyin").info(
  136. f"非视频:{entity_type}")
  137. if entity_type == 'GENERAL':
  138. video_id = data[j].get('aweme_id') # 文章id
  139. id = cls.select_videoUrl_id(video_id, mark)
  140. if id:
  141. count += 1
  142. if count > 5:
  143. Common.logger("douyin").info(
  144. f"重复视频不在抓取该用户,用户主页id:{account_id}")
  145. exit_flag = True
  146. break
  147. continue
  148. video_url = data[j].get('video').get('play_addr').get('url_list')[0] # 视频链接
  149. channel_name = mark+'/douyin'
  150. oss_object_key = Oss.video_sync_upload_oss(video_url, video_id, account_id, channel_name)
  151. status = oss_object_key.get("status")
  152. # 发送 oss
  153. oss_object_key = oss_object_key.get("oss_object_key")
  154. Common.logger("douyin").info(f"抖音视频链接oss发送成功,oss地址:{oss_object_key}")
  155. if status == 200:
  156. cls.insert_videoUrl(video_id, account_id, oss_object_key, mark)
  157. Common.logger("douyin").info(
  158. f"视频地址插入数据库成功,视频id:{video_id},用户主页id:{account_id},视频储存地址:{oss_object_key}")
  159. # 发送成功 存入数据库
  160. except Exception as e:
  161. Common.logger("douyin").warning(f"抓取单条视频异常:{e}\n")
  162. continue
  163. except Exception as e:
  164. Common.logger("douyin").warning(f"抓取异常:{e}\n")
  165. return
  166. if __name__ == '__main__':
  167. douyinAuthor.get_videoList()