douyin.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. import json
  2. import random
  3. import time
  4. import requests
  5. import urllib3
  6. from requests.adapters import HTTPAdapter
  7. from common import Material, Feishu, AliyunLogger
  8. from common.sql_help import sqlCollect
  9. from data_channel.douyin_help import DouYinHelper
  10. class DY:
  11. @classmethod
  12. def get_dy_url(cls, task_mark, url_id, number, mark, feishu_id, cookie_sheet, channel_id, name):
  13. list = []
  14. next_cursor = 0
  15. try:
  16. for i in range(6):
  17. cookie = Material.get_cookie_data(feishu_id, cookie_sheet, channel_id)
  18. time.sleep(random.randint(1, 5))
  19. url = 'https://www.douyin.com/aweme/v1/web/aweme/post/'
  20. headers = {
  21. 'Accept': 'application/json, text/plain, */*',
  22. 'Accept-Language': 'zh-CN,zh;q=0.9',
  23. 'Cache-Control': 'no-cache',
  24. 'Cookie': cookie,
  25. 'Pragma': 'no-cache',
  26. 'Referer': f'https://www.douyin.com/user/{url_id}',
  27. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
  28. 'Chrome/118.0.0.0 Safari/537.36',
  29. }
  30. query = DouYinHelper.get_full_query(ua=headers['User-Agent'], extra_data={
  31. 'sec_user_id': url_id,
  32. 'max_cursor': next_cursor,
  33. 'locate_query': 'false',
  34. 'show_live_replay_strategy': '1',
  35. 'need_time_list': '1',
  36. 'time_list_query': '0',
  37. 'whale_cut_token': '',
  38. 'cut_version': '1',
  39. 'count': '18',
  40. 'publish_video_strategy_type': '2',
  41. })
  42. urllib3.disable_warnings()
  43. s = requests.session()
  44. s.mount('http://', HTTPAdapter(max_retries=3))
  45. s.mount('https://', HTTPAdapter(max_retries=3))
  46. response = requests.request(method='GET', url=url, headers=headers, params=query, timeout=30)
  47. body = response.content.decode()
  48. obj = json.loads(body)
  49. has_more = True if obj.get('has_more', 0) == 1 else False
  50. next_cursor = str(obj.get('max_cursor')) if has_more else None
  51. data = obj.get('aweme_list', [])
  52. if data == [] and len(data) == 0:
  53. if name == '抖音品类账号' or name == '抖音品类账号-1' or name == 'Top溯源账号':
  54. # Feishu.bot("wangxueke", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '王雪珂')
  55. Feishu.bot("xinxin", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '信欣')
  56. Feishu.bot("liuzhaoheng", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '刘兆恒')
  57. else:
  58. Feishu.bot(mark, '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', name)
  59. return list
  60. response.close()
  61. for i in range(len(data)):
  62. entity_type = data[i].get('media_type')
  63. if entity_type == 4:
  64. # is_top = data[i].get('is_top') # 是否置顶
  65. video_id = data[i].get('aweme_id') # 文章id
  66. day_count = Material.get_count_restrict(channel_id)
  67. if day_count:
  68. status =sqlCollect.is_used_days(video_id, mark, channel_id,day_count)
  69. else:
  70. status = sqlCollect.is_used(video_id, mark, channel_id)
  71. video_uri = data[i].get('video', {}).get('play_addr', {}).get('uri')
  72. ratio = f'{data[i].get("video", {}).get("height")}p'
  73. video_url = data[i].get('video', {}).get('play_addr', {}).get('url_list', [None])[0]
  74. # video_url = f'https://www.iesdouyin.com/aweme/v1/play/?video_id={video_uri}&ratio={ratio}&line=0' # 视频链接
  75. digg_count = int(data[i].get('statistics').get('digg_count')) # 点赞
  76. # comment_count = int(data[i].get('statistics').get('comment_count')) # 评论
  77. share_count = int(data[i].get('statistics').get('share_count')) # 转发
  78. old_title = data[i].get('desc', "").strip().replace("\n", "") \
  79. .replace("/", "").replace("\\", "").replace("\r", "") \
  80. .replace(":", "").replace("*", "").replace("?", "") \
  81. .replace("?", "").replace('"', "").replace("<", "") \
  82. .replace(">", "").replace("|", "").replace(" ", "") \
  83. .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
  84. .replace("'", "").replace("#", "").replace("Merge", "")
  85. duration = data[i].get('video', {}).get('duration', {})
  86. duration = int(duration) // 1000
  87. # duration = dataHelp.video_duration(video_url)
  88. log_data = f"user:{url_id},,video_id:{video_id},,video_url:{video_url},,original_title:{old_title},,share_count:{share_count},,digg_count:{digg_count},,duration:{duration}"
  89. AliyunLogger.logging(channel_id, name, url_id, video_id, "扫描到一条视频", "2001", log_data)
  90. if status:
  91. AliyunLogger.logging(channel_id, name, url_id, video_id, "该视频已改造过", "2002", log_data)
  92. continue
  93. if share_count < 200:
  94. AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享小于200", "2003", log_data)
  95. continue
  96. video_percent = '%.2f' % (share_count / digg_count)
  97. special = float(0.15)
  98. if float(video_percent) < special:
  99. AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:分享/点赞小于0.15", "2003", log_data)
  100. continue
  101. if int(duration) < 30 or int(duration) > 720:
  102. AliyunLogger.logging(channel_id, name, url_id, video_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
  103. continue
  104. cover_url = data[i].get('video').get('cover').get('url_list')[0] # 视频封面
  105. all_data = {"video_id": video_id, "cover": cover_url, "video_url": video_url,
  106. "rule": video_percent, "old_title": old_title}
  107. list.append(all_data)
  108. AliyunLogger.logging(channel_id, name, url_id, video_id, "符合规则等待改造", "2004", log_data)
  109. if len(list) == int(number):
  110. return list
  111. return list
  112. except Exception as exc:
  113. # Feishu.bot("liuzhaoheng", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '刘兆恒')
  114. # Feishu.bot("wangxueke", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '王雪珂')
  115. # Feishu.bot("xinxin", '机器自动改造消息通知', f'抖音-{name}cookie过期,请及时更换', '信欣')
  116. return list