123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- import random
- import time
- import requests
- import json
- from common import Common
- from common.sql_help import sqlCollect
- class DYLS:
- @classmethod
- def get_dyls_list(cls, task_mark, url_id, number, mark):
- next_cursor = ""
- for i in range(50):
- url = "http://8.217.190.241:8888/crawler/dou_yin/blogger"
- payload = json.dumps({
- "account_id": url_id,
- "source": "抖查查",
- "cursor": next_cursor
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- time.sleep(random.randint(1, 5))
- response = requests.request("POST", url, headers=headers, data=payload)
- response = response.json()
- list = []
- data_all_list = response["data"]
- has_more = data_all_list["has_more"]
- next_cursor = str(data_all_list["next_cursor"])
- try:
- data_list = data_all_list["data"]
- for data in data_list:
- # comment_count = data["comment_count"]
- # download_count = data["download_count"]
- share_count = data["share_count"]
- good_count = data["good_count"]
- # collect_count = data["collect_count"]
- duration = data["duration"]
- video_id = data["video_id"]
- old_title = data["video_desc"]
- status = sqlCollect.is_used(task_mark, video_id, mark, "抖音")
- if status:
- status = sqlCollect.is_used(task_mark, video_id, mark, "抖音历史")
- if status == False:
- continue
- video_percent = '%.2f' % (int(share_count) / int(good_count))
- special = float(0.25)
- duration = duration / 1000
- if int(share_count) < 500 or float(video_percent) < special or int(duration) < 30 or int(duration) > 720:
- Common.logger("dy-ls").info(
- f"不符合规则:{task_mark},用户主页id:{url_id},视频id{video_id} ,分享:{share_count},点赞{good_count} ,时长:{int(duration)} ")
- continue
- video_url, image_url = cls.get_video(video_id)
- if video_url:
- all_data = {"video_id": video_id, "cover": image_url, "video_url": video_url, "rule": video_percent,
- "old_title": old_title}
- list.append(all_data)
- if len(list) == int(number):
- Common.logger("dy-ls").info(f"获取抖音历史视频总数:{len(list)}\n")
- return list
- if has_more == False:
- return list
- except Exception as exc:
- Common.logger("dy-ls").info(f"抖音历史数据获取失败:{exc}\n")
- return list
- @classmethod
- def get_video(cls, video_id):
- url = "http://8.217.190.241:8888/crawler/dou_yin/detail"
- payload = json.dumps({
- "content_id": str(video_id)
- })
- headers = {
- 'Content-Type': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- response = response.json()
- data = response["data"]["data"]
- video_url = data["video_url_list"][0]["video_url"]
- image_url = data["image_url_list"][0]["image_url"]
- return video_url, image_url
- if __name__ == '__main__':
- # DYLS.get_video("7314923922602954022")
- DYLS.get_dyls_list("1","MS4wLjABAAAA2QEvnEb7cQDAg6vZXq3j8_LlbO_DiturnV7VeybFKY4",1,"1")
|