import json from common.mq import MQ from tqdm import tqdm if __name__ == "__main__": path = 'resend.txt' with open(path, "r", encoding="utf-8") as f: datas = f.readlines() for line in tqdm(datas): video_dict = {} msg = json.loads(line)['msg'] # print(msg) platform = json.loads(line)['platform'] strategy = json.loads(line)['strategy'] strs = msg.replace("CrawlerEtlParam", "")[1: -1] str_list = strs.split(", ") str_list = [i.replace("'", "") for i in str_list] key_dict = { "crawlerRule": "crawler_rule", "userId": "user_id", "outUserId": "out_user_id", "userName": "user_name", "avatarUrl": "avatar_url", "outVideoId": "out_video_id", "videoTitle": "video_title", "coverUrl": "cover_url", "videoUrl": "video_url", "publishTime": "publish_time", "playCnt": "play_cnt", "likeCnt": "like_cnt", "shareCnt": "share_cnt", "collectionCnt": "collection_cnt", "commentCnt": "comment_cnt", "strategyType": "strategy" } for index, i in enumerate(str_list[:-4]): if "=" not in i: continue else: key = i.split("=")[0] value = i[len(key) + 1:] new_key = key_dict.get(key, key) video_dict[new_key] = value.replace("null", "") video_dict['strategy'] = strategy video_dict['platform'] = platform video_dict['crawler_rule'] = json.dumps({}) # print(json.dumps(video_dict, ensure_ascii=False, indent=4)) mq = MQ(topic_name="topic_crawler_etl_" + "prod") mq.send_msg(video_dict)