1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- """
- @author: luojunhui
- """
- import json
- import time
- import pymysql
- from tqdm import tqdm
- spider_connection = pymysql.connect(
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
- port=3306,
- user="crawler",
- passwd="crawler123456@",
- db="piaoquan-crawler",
- charset="utf8mb4"
- )
- with open("id.txt", encoding="utf-8") as f:
- data = f.readlines()
- L = {}
- for line in data:
- new_id = line.split(",")[1].strip()
- old_id = line.split(",")[0]
- L[new_id] = old_id
- print(len(L))
- with open("update_video_trace_id.json", encoding="utf-8") as f:
- publish_data = json.loads(f.read())
- print(len(publish_data))
- def update_into_long_videos(trace_id, vid1, vid2, vid3):
- """
- 更新video_id
- :param trace_id:
- :param vid1:
- :param vid2:
- :param vid3:
- :return:
- """
- update_sql = f"""
- UPDATE long_articles_video
- set recall_video_id1 = %s, recall_video_id2 = %s, recall_video_id3 = %s
- where trace_id = %s;
- """
- cursor = spider_connection.cursor()
- cursor.execute(
- update_sql,
- (vid1, vid2, vid3, trace_id)
- )
- spider_connection.commit()
- ff = 0
- for item in tqdm(publish_data):
- trace_id = item['trace_id']
- data_info = item['result_data']
- try:
- vid_list = []
- for video_obj in json.loads(data_info):
- path = video_obj['productionPath']
- video_id = path.split("id%3D")[1].split("%26su")[0]
- # if L.get(video_id):
- vid_list.append(video_id)
- update_into_long_videos(
- trace_id=trace_id,
- vid1=vid_list[0],
- vid2=vid_list[1],
- vid3=vid_list[2]
- )
- except Exception as e:
- print("No videos {}".format(e))
- print(ff)
|