douyin_author.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. # -*- coding: utf-8 -*-
  2. # @Time: 2023/12/22
  3. import datetime
  4. import os
  5. import random
  6. import sys
  7. import time
  8. from datetime import datetime
  9. import requests
  10. import json
  11. import urllib3
  12. from extract_data.douyin.douyin_author_help import DouYinHelper
  13. sys.path.append(os.getcwd())
  14. from common.aliyun_oss_uploading import Oss
  15. from common.common import Common
  16. from common.material import Material
  17. from common.feishu import Feishu
  18. from common.db import MysqlHelper
  19. from requests.adapters import HTTPAdapter
  20. class douyinAuthor():
  21. """
  22. 获取抖音用户主页id
  23. """
  24. @classmethod
  25. def get_videoUserId(cls, mark):
  26. select_user_sql = f"""select user_id, channel from agc_channel_data where mark = '{mark}' and channel = '抖音' ORDER BY id DESC;"""
  27. user_list = MysqlHelper.get_values(select_user_sql, "prod")
  28. return user_list
  29. """
  30. oss视频地址 存入数据库
  31. """
  32. @classmethod
  33. def insert_videoUrl(cls, video_id, account_id, oss_object_key, mark):
  34. current_time = datetime.now()
  35. formatted_time = current_time.strftime("%Y-%m-%d %H:%M")
  36. insert_sql = f"""INSERT INTO agc_video_url (video_id, account_id, oss_object_key, time , status, mark) values ("{video_id}", "{account_id}", "{oss_object_key}", "{formatted_time}", 1, "{mark}")"""
  37. MysqlHelper.update_values(
  38. sql=insert_sql,
  39. env="prod",
  40. machine="",
  41. )
  42. """
  43. 查询该video_id是否在数据库存在
  44. """
  45. @classmethod
  46. def select_videoUrl_id(cls, video_id, mark):
  47. select_user_sql = f"""select video_id from agc_video_url where video_id='{video_id}' and mark = '{mark}';"""
  48. user_list = MysqlHelper.get_values(select_user_sql, "prod")
  49. if user_list:
  50. return True
  51. else:
  52. return False
  53. """抖音读取数据 将数据存储到oss上"""
  54. @classmethod
  55. def get_videoList(cls, data):
  56. try:
  57. mark = data['mark']
  58. mark_name = data['mark_name']
  59. token = data['token']
  60. feishu_id = data['feishu_id']
  61. channel_id = data['channel'][0]
  62. channel = data['channel'][1]
  63. user_list = Material.insert_user(feishu_id, channel_id, mark, channel)
  64. cookie = Material.get_cookie(feishu_id, token, channel)
  65. if len(user_list) == 0:
  66. return
  67. for account_id in user_list:
  68. Common.logger("douyin").info(f"用户主页ID:{account_id}")
  69. next_cursor = 0
  70. count = 0
  71. while True:
  72. if next_cursor == None:
  73. break
  74. if count > 5:
  75. continue
  76. time.sleep(random.randint(5, 10))
  77. url = 'https://www.douyin.com/aweme/v1/web/aweme/post/'
  78. headers = {
  79. 'Accept': 'application/json, text/plain, */*',
  80. 'Accept-Language': 'zh-CN,zh;q=0.9',
  81. 'Cache-Control': 'no-cache',
  82. 'Cookie': cookie,
  83. 'Pragma': 'no-cache',
  84. 'Referer': f'https://www.douyin.com/user/{account_id}',
  85. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
  86. 'Chrome/118.0.0.0 Safari/537.36',
  87. }
  88. query = DouYinHelper.get_full_query(ua=headers['User-Agent'], extra_data={
  89. 'sec_user_id': account_id,
  90. 'max_cursor': next_cursor,
  91. 'locate_query': 'false',
  92. 'show_live_replay_strategy': '1',
  93. 'need_time_list': '1',
  94. 'time_list_query': '0',
  95. 'whale_cut_token': '',
  96. 'cut_version': '1',
  97. 'count': '18',
  98. 'publish_video_strategy_type': '2',
  99. })
  100. urllib3.disable_warnings()
  101. s = requests.session()
  102. s.mount('http://', HTTPAdapter(max_retries=3))
  103. s.mount('https://', HTTPAdapter(max_retries=3))
  104. response = requests.request(method='GET', url=url, headers=headers, params=query)
  105. text = response.text
  106. if len(text) == 0:
  107. time.sleep(60)
  108. continue
  109. body = response.content.decode()
  110. obj = json.loads(body)
  111. has_more = True if obj.get('has_more', 0) == 1 else False
  112. next_cursor = str(obj.get('max_cursor')) if has_more else None
  113. data = obj.get('aweme_list', [])
  114. response.close()
  115. if response.status_code != 200:
  116. Common.logger("douyin").info(
  117. f"接口请求失败,请更换cookie,{response.status_code}")
  118. Feishu.bot('recommend', '抖音', f'抖音cookie失效,请及时更换~', mark, mark_name)
  119. # 如果返回空信息,则随机睡眠 600, 1200 秒
  120. time.sleep(random.randint(600, 1200))
  121. continue
  122. elif len(data) == 0:
  123. Common.logger("douyin").info(
  124. f"接口请求失败,请更换cookie")
  125. continue
  126. for j in range(len(data)):
  127. try:
  128. entity_type = data[j].get('search_impr').get('entity_type')
  129. Common.logger("douyin").info(
  130. f"非视频:{entity_type}")
  131. if entity_type == 'GENERAL':
  132. video_id = data[j].get('aweme_id') # 文章id
  133. id = cls.select_videoUrl_id(video_id, mark)
  134. if id:
  135. count += 1
  136. if count > 5:
  137. Common.logger("douyin").info(
  138. f"重复视频不在抓取该用户,用户主页id:{account_id}")
  139. return
  140. continue
  141. video_url = data[j].get('video').get('play_addr').get('url_list')[0] # 视频链接
  142. channel_name = mark+'/douyin'
  143. oss_object_key = Oss.video_sync_upload_oss(video_url, video_id, account_id, channel_name)
  144. status = oss_object_key.get("status")
  145. # 发送 oss
  146. oss_object_key = oss_object_key.get("oss_object_key")
  147. Common.logger("douyin").info(f"抖音视频链接oss发送成功,oss地址:{oss_object_key}")
  148. if status == 200:
  149. cls.insert_videoUrl(video_id, account_id, oss_object_key, mark)
  150. Common.logger("douyin").info(
  151. f"视频地址插入数据库成功,视频id:{video_id},用户主页id:{account_id},视频储存地址:{oss_object_key}")
  152. # 发送成功 存入数据库
  153. except Exception as e:
  154. Common.logger("douyin").warning(f"抓取单条视频异常:{e}\n")
  155. continue
  156. except Exception as e:
  157. Common.logger("douyin").warning(f"抓取异常:{e}\n")
  158. return
  159. if __name__ == '__main__':
  160. douyinAuthor.get_videoList()