douyin.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. import json
  2. import random
  3. import time
  4. import requests
  5. import urllib3
  6. from requests.adapters import HTTPAdapter
  7. from common import Material, Common, Feishu
  8. from common.sql_help import sqlCollect
  9. from data_channel.data_help import dataHelp
  10. from data_channel.douyin_help import DouYinHelper
  11. class DY:
  12. @classmethod
  13. def get_dy_url(cls, task_mark, url_id, number, mark, feishu_id, cookie_sheet, channel_id, name):
  14. list = []
  15. next_cursor = 0
  16. for i in range(3):
  17. cookie = Material.get_cookie_data(feishu_id, cookie_sheet, channel_id)
  18. time.sleep(random.randint(1, 5))
  19. url = 'https://www.douyin.com/aweme/v1/web/aweme/post/'
  20. headers = {
  21. 'Accept': 'application/json, text/plain, */*',
  22. 'Accept-Language': 'zh-CN,zh;q=0.9',
  23. 'Cache-Control': 'no-cache',
  24. 'Cookie': cookie,
  25. 'Pragma': 'no-cache',
  26. 'Referer': f'https://www.douyin.com/user/{url_id}',
  27. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
  28. 'Chrome/118.0.0.0 Safari/537.36',
  29. }
  30. query = DouYinHelper.get_full_query(ua=headers['User-Agent'], extra_data={
  31. 'sec_user_id': url_id,
  32. 'max_cursor': next_cursor,
  33. 'locate_query': 'false',
  34. 'show_live_replay_strategy': '1',
  35. 'need_time_list': '1',
  36. 'time_list_query': '0',
  37. 'whale_cut_token': '',
  38. 'cut_version': '1',
  39. 'count': '18',
  40. 'publish_video_strategy_type': '2',
  41. })
  42. urllib3.disable_warnings()
  43. s = requests.session()
  44. s.mount('http://', HTTPAdapter(max_retries=3))
  45. s.mount('https://', HTTPAdapter(max_retries=3))
  46. response = requests.request(method='GET', url=url, headers=headers, params=query)
  47. body = response.content.decode()
  48. obj = json.loads(body)
  49. has_more = True if obj.get('has_more', 0) == 1 else False
  50. next_cursor = str(obj.get('max_cursor')) if has_more else None
  51. data = obj.get('aweme_list', [])
  52. if data == [] and len(data) == 0:
  53. Feishu.bot(mark, '机器自动改造消息通知', f'抖音cookie过期,请及时更换', name)
  54. return list
  55. response.close()
  56. for i in range(len(data)):
  57. entity_type = data[i].get('media_type')
  58. if entity_type == 4:
  59. # is_top = data[i].get('is_top') # 是否置顶
  60. video_id = data[i].get('aweme_id') # 文章id
  61. status = sqlCollect.is_used(video_id, mark, channel_id)
  62. if status:
  63. video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
  64. ratio = f'{data[i].get("video", {}).get("height")}p'
  65. video_url = f'https://www.iesdouyin.com/aweme/v1/play/?video_id={video_uri}&ratio={ratio}&line=0' # 视频链接
  66. digg_count = int(data[i].get('statistics').get('digg_count')) # 点赞
  67. # comment_count = int(data[i].get('statistics').get('comment_count')) # 评论
  68. share_count = int(data[i].get('statistics').get('share_count')) # 转发
  69. old_title = data[i].get('desc', "").strip().replace("\n", "") \
  70. .replace("/", "").replace("\\", "").replace("\r", "") \
  71. .replace(":", "").replace("*", "").replace("?", "") \
  72. .replace("?", "").replace('"', "").replace("<", "") \
  73. .replace(">", "").replace("|", "").replace(" ", "") \
  74. .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
  75. .replace("'", "").replace("#", "").replace("Merge", "")
  76. Common.logger("dy").info(
  77. f"扫描:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
  78. if share_count < 500:
  79. Common.logger("dy").info(
  80. f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count}")
  81. continue
  82. video_percent = '%.2f' % (share_count / digg_count)
  83. special = float(0.25)
  84. if float(video_percent) < special:
  85. Common.logger("dy").info(
  86. f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ")
  87. continue
  88. duration = dataHelp.video_duration(video_url)
  89. if int(duration) < 30 or int(duration) > 720:
  90. Common.logger("dy").info(
  91. f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{digg_count} ,时长:{duration} ")
  92. continue
  93. cover_url = data[i].get('video').get('cover').get('url_list')[0] # 视频封面
  94. all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url, "rule": video_percent, "old_title": old_title}
  95. list.append(all_data)
  96. if len(list) == int(number):
  97. Common.logger(mark).info(f"获取抖音视频总数:{len(list)}\n")
  98. return list
  99. return list