resend_msg.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import json
  2. from common.mq import MQ
  3. from tqdm import tqdm
  4. if __name__ == "__main__":
  5. path = 'resend.txt'
  6. with open(path, "r", encoding="utf-8") as f:
  7. datas = f.readlines()
  8. for line in tqdm(datas):
  9. video_dict = {}
  10. msg = json.loads(line)['msg']
  11. # print(msg)
  12. platform = json.loads(line)['platform']
  13. strategy = json.loads(line)['strategy']
  14. strs = msg.replace("CrawlerEtlParam", "")[1: -1]
  15. str_list = strs.split(", ")
  16. str_list = [i.replace("'", "") for i in str_list]
  17. key_dict = {
  18. "crawlerRule": "crawler_rule",
  19. "userId": "user_id",
  20. "outUserId": "out_user_id",
  21. "userName": "user_name",
  22. "avatarUrl": "avatar_url",
  23. "outVideoId": "out_video_id",
  24. "videoTitle": "video_title",
  25. "coverUrl": "cover_url",
  26. "videoUrl": "video_url",
  27. "publishTime": "publish_time",
  28. "playCnt": "play_cnt",
  29. "likeCnt": "like_cnt",
  30. "shareCnt": "share_cnt",
  31. "collectionCnt": "collection_cnt",
  32. "commentCnt": "comment_cnt",
  33. "strategyType": "strategy"
  34. }
  35. for index, i in enumerate(str_list[:-4]):
  36. if "=" not in i:
  37. continue
  38. else:
  39. key = i.split("=")[0]
  40. value = i[len(key) + 1:]
  41. new_key = key_dict.get(key, key)
  42. video_dict[new_key] = value.replace("null", "")
  43. video_dict['strategy'] = strategy
  44. video_dict['platform'] = platform
  45. video_dict['crawler_rule'] = json.dumps({})
  46. # print(json.dumps(video_dict, ensure_ascii=False, indent=4))
  47. mq = MQ(topic_name="topic_crawler_etl_" + "prod")
  48. mq.send_msg(video_dict)