dy_ls.py 3.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. import random
  2. import time
  3. import requests
  4. import json
  5. from common import Common
  6. from common.sql_help import sqlCollect
  7. class DYLS:
  8. @classmethod
  9. def get_dyls_list(cls, task_mark, url_id, number, mark):
  10. next_cursor = ""
  11. for i in range(50):
  12. # 抖查查
  13. url = "http://8.217.190.241:8888/crawler/dou_yin/blogger"
  14. payload = json.dumps({
  15. "account_id": url_id,
  16. "source": "抖查查",
  17. "cursor": next_cursor
  18. })
  19. headers = {
  20. 'Content-Type': 'application/json'
  21. }
  22. time.sleep(random.randint(1, 5))
  23. response = requests.request("POST", url, headers=headers, data=payload)
  24. response = response.json()
  25. list = []
  26. data_all_list = response["data"]
  27. has_more = data_all_list["has_more"]
  28. next_cursor = str(data_all_list["next_cursor"])
  29. try:
  30. data_list = data_all_list["data"]
  31. for data in data_list:
  32. # comment_count = data["comment_count"]
  33. # download_count = data["download_count"]
  34. share_count = data["share_count"]
  35. good_count = data["good_count"]
  36. # collect_count = data["collect_count"]
  37. duration = data["duration"]
  38. video_id = data["video_id"]
  39. old_title = data["video_desc"]
  40. status = sqlCollect.is_used(task_mark, video_id, mark, "抖音")
  41. if status:
  42. status = sqlCollect.is_used(task_mark, video_id, mark, "抖音历史")
  43. if status == False:
  44. continue
  45. video_percent = '%.2f' % (int(share_count) / int(good_count))
  46. special = float(0.25)
  47. duration = duration / 1000
  48. if int(share_count) < 500 or float(video_percent) < special or int(duration) < 30 or int(duration) > 720:
  49. Common.logger("dy-ls").info(
  50. f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{good_count} ,时长:{int(duration)} ")
  51. continue
  52. video_url, image_url = cls.get_video(video_id)
  53. if video_url:
  54. all_data = {"video_id": video_id, "cover": image_url, "video_url": video_url, "rule": video_percent,
  55. "old_title": old_title}
  56. list.append(all_data)
  57. if len(list) == int(number):
  58. Common.logger("dy-ls").info(f"获取抖音历史视频总数:{len(list)}\n")
  59. return list
  60. if has_more == False:
  61. return list
  62. except Exception as exc:
  63. Common.logger("dy-ls").info(f"抖音历史数据获取失败:{exc}\n")
  64. return list
  65. @classmethod
  66. def get_video(cls, video_id):
  67. url = "http://8.217.190.241:8888/crawler/dou_yin/detail"
  68. payload = json.dumps({
  69. "content_id": str(video_id)
  70. })
  71. headers = {
  72. 'Content-Type': 'application/json'
  73. }
  74. response = requests.request("POST", url, headers=headers, data=payload)
  75. response = response.json()
  76. data = response["data"]["data"]
  77. video_url = data["video_url_list"][0]["video_url"]
  78. image_url = data["image_url_list"][0]["image_url"]
  79. return video_url, image_url
  80. if __name__ == '__main__':
  81. # DYLS.get_video("7314923922602954022")
  82. DYLS.get_dyls_list("1","MS4wLjABAAAA2QEvnEb7cQDAg6vZXq3j8_LlbO_DiturnV7VeybFKY4",1,"1")