""" @author: luojunhui """ import json import time import pymysql from tqdm import tqdm spider_connection = pymysql.connect( host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", port=3306, user="crawler", passwd="crawler123456@", db="piaoquan-crawler", charset="utf8mb4" ) with open("id.txt", encoding="utf-8") as f: data = f.readlines() L = {} for line in data: new_id = line.split(",")[1].strip() old_id = line.split(",")[0] L[new_id] = old_id print(len(L)) with open("update_video_trace_id.json", encoding="utf-8") as f: publish_data = json.loads(f.read()) print(len(publish_data)) def update_into_long_videos(trace_id, vid1, vid2, vid3): """ 更新video_id :param trace_id: :param vid1: :param vid2: :param vid3: :return: """ update_sql = f""" UPDATE long_articles_video set recall_video_id1 = %s, recall_video_id2 = %s, recall_video_id3 = %s where trace_id = %s; """ cursor = spider_connection.cursor() cursor.execute( update_sql, (vid1, vid2, vid3, trace_id) ) spider_connection.commit() ff = 0 for item in tqdm(publish_data): trace_id = item['trace_id'] data_info = item['result_data'] try: vid_list = [] for video_obj in json.loads(data_info): path = video_obj['productionPath'] video_id = path.split("id%3D")[1].split("%26su")[0] # if L.get(video_id): vid_list.append(video_id) update_into_long_videos( trace_id=trace_id, vid1=vid_list[0], vid2=vid_list[1], vid3=vid_list[2] ) except Exception as e: print("No videos {}".format(e)) print(ff)