1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- import json
- from common.mq import MQ
- from tqdm import tqdm
- if __name__ == "__main__":
- path = 'resend.txt'
- with open(path, "r", encoding="utf-8") as f:
- datas = f.readlines()
- for line in tqdm(datas):
- video_dict = {}
- msg = json.loads(line)['msg']
- # print(msg)
- platform = json.loads(line)['platform']
- strategy = json.loads(line)['strategy']
- strs = msg.replace("CrawlerEtlParam", "")[1: -1]
- str_list = strs.split(", ")
- str_list = [i.replace("'", "") for i in str_list]
- key_dict = {
- "crawlerRule": "crawler_rule",
- "userId": "user_id",
- "outUserId": "out_user_id",
- "userName": "user_name",
- "avatarUrl": "avatar_url",
- "outVideoId": "out_video_id",
- "videoTitle": "video_title",
- "coverUrl": "cover_url",
- "videoUrl": "video_url",
- "publishTime": "publish_time",
- "playCnt": "play_cnt",
- "likeCnt": "like_cnt",
- "shareCnt": "share_cnt",
- "collectionCnt": "collection_cnt",
- "commentCnt": "comment_cnt",
- "strategyType": "strategy"
- }
- for index, i in enumerate(str_list[:-4]):
- if "=" not in i:
- continue
- else:
- key = i.split("=")[0]
- value = i[len(key) + 1:]
- new_key = key_dict.get(key, key)
- video_dict[new_key] = value.replace("null", "")
- video_dict['strategy'] = strategy
- video_dict['platform'] = platform
- video_dict['crawler_rule'] = json.dumps({})
- # print(json.dumps(video_dict, ensure_ascii=False, indent=4))
- mq = MQ(topic_name="topic_crawler_etl_" + "prod")
- mq.send_msg(video_dict)
|