channel_automation_provide_job_download.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import json
  2. file = "/Users/zhao/Downloads/0fa4225d-ab4f-46cb-ab56-87ac0d96416f.json"
  3. log_json_list = []
  4. with open(file) as f:
  5. line = f.readline()
  6. while line:
  7. log_json_list.append(json.loads(line))
  8. line = f.readline()
  9. print(f"crawlerMode,videoId,品类,视频截帧,爬取计划ID,站外视频ID,站外账号ID,结果,原因,分享量,点赞量,分享量/点赞量,视频时长(秒),观众年龄50+占比,观众年龄50+TGI,过滤规则表达式")
  10. for log in log_json_list:
  11. merge_cate2 = log.get("mergeSecondLevelCate", "")
  12. crawler_mode = log.get('crawlerMode', '')
  13. video_id = log['videoId']
  14. crawler_plan_id = log.get('crawlerPlanId', '')
  15. result = log.get('result', False)
  16. reason = log.get('reason', '成功')
  17. if 'ext' not in log:
  18. print(f"{crawler_mode},{video_id},{merge_cate2},,'{crawler_plan_id},,,{result},{reason}")
  19. continue
  20. ext_json = json.loads(log.get('ext', "{}"))
  21. extra_frame_image_url = ext_json.get('extraFrameImageUrl', '')
  22. for channel_content_id in ext_json:
  23. if channel_content_id in ['mergeCate2Map', 'extraFrameImageUrl']:
  24. continue
  25. channel_ext_info = ext_json[channel_content_id]
  26. if 'ruleContext' not in channel_ext_info:
  27. continue
  28. result = channel_ext_info.get('result', False)
  29. rule_str = channel_ext_info.get('rule', "")
  30. rule_context = channel_ext_info.get('ruleContext', ())
  31. content_detail = channel_ext_info.get('contentDetail', ())
  32. channel_account_id = content_detail.get('channelAccountId', '')
  33. share_cnt = rule_context.get('shareCnt', 0)
  34. video_duration_s = rule_context.get('videoDuration_s', 0)
  35. like_cnt = rule_context.get('likeCnt', 0)
  36. audience_age_50_rate = rule_context.get('audienceAge50Rate', 0)
  37. audience_age_50_tgi = rule_context.get('audienceAge50TGI', 0)
  38. share_div_link = rule_context.get('shareDivLink', 0)
  39. print(f"{crawler_mode},{video_id},{merge_cate2},{extra_frame_image_url},'{crawler_plan_id},'{channel_content_id},{channel_account_id},{result},{reason},"
  40. f"{share_cnt},{like_cnt},{share_div_link},{video_duration_s},{audience_age_50_rate},{audience_age_50_tgi},{rule_str}")