channel_automation_provide_job_download.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334
  1. import json
  2. file = "/Users/zhao/Downloads/da94a214-ddbb-491f-8984-9505b30d43cb.json"
  3. log_json_list = []
  4. with open(file) as f:
  5. line = f.readline()
  6. while line:
  7. log_json_list.append(json.loads(line))
  8. line = f.readline()
  9. print(f"videoId,品类,视频截帧,爬取计划ID,站外视频ID,站外账号ID,结果,分享量,点赞量,分享量/点赞量,视频时长(秒),观众年龄50+占比,观众年龄50+TGI,过滤规则表达式")
  10. for log in log_json_list:
  11. video_id = log['videoId']
  12. crawler_plan_id = log['crawlerPlanId']
  13. ext_json = json.loads(log['ext'])
  14. merge_cate2_map = ext_json['mergeCate2Map']
  15. extra_frame_image_url = ext_json['extraFrameImageUrl']
  16. for channel_content_id in ext_json:
  17. if channel_content_id in ['mergeCate2Map', 'extraFrameImageUrl']:
  18. continue
  19. channel_ext_info = ext_json[channel_content_id]
  20. channel_account_id = channel_ext_info.get("aweme_info", "{}").get("author", "{}").get("sec_uid", "")
  21. result = channel_ext_info['contentDetail'].get('result', False)
  22. rule_str = channel_ext_info['rule']
  23. rule_context = channel_ext_info['ruleContext']
  24. share_cnt = rule_context['shareCnt']
  25. video_duration_s = rule_context['videoDuration_s']
  26. like_cnt = rule_context['likeCnt']
  27. audience_age_50_rate = rule_context['audienceAge50Rate']
  28. audience_age_50_tgi = rule_context['audienceAge50TGI']
  29. share_div_link = rule_context['shareDivLink']
  30. print(f"{video_id},{merge_cate2_map},{extra_frame_image_url},'{crawler_plan_id},'{channel_content_id},{channel_account_id},{result},"
  31. f"{share_cnt},{like_cnt},{share_div_link},{video_duration_s},{audience_age_50_rate},{audience_age_50_tgi},{rule_str}")