1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- import pymysql
- import json
- from tqdm import tqdm
- spider_connection = pymysql.connect(
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
- port=3306,
- user="crawler",
- passwd="crawler123456@",
- db="piaoquan-crawler",
- charset="utf8mb4"
- )
- def find_videos(trace_id):
- """
- 查询已经发布的视频
- :return:
- """
- select_sql = f"""
- select recall_video_id1, recall_video_id2, recall_video_id3 from long_articles_video
- where trace_id = '{trace_id}';
- """
- cursor = spider_connection.cursor()
- cursor.execute(select_sql)
- data = cursor.fetchall()
- vid_tuple = data[0]
- return vid_tuple
- def insert_into_queue(video_id, publish_time_stamp, trace_id):
- update_sql = f"""
- INSERT INTO get_off_videos
- (video_id, publish_time, video_status, trace_id)
- values
- (%s, %s, %s, %s);
- """
- cursor = spider_connection.cursor()
- cursor.execute(
- update_sql,
- (video_id, time_stamp, 1, trace_id)
- )
- spider_connection.commit()
- if __name__ == '__main__':
- with open("tt_id.txt", encoding="utf-8") as f:
- data_line = f.readlines()
- for line in tqdm(data_line):
- line_ = line.split("\t")
- trace_id = line_[0]
- time_stamp = int(int(line_[1].strip()) / 1000)
- vid_tuple = find_videos(trace_id)
- for video_id in vid_tuple:
- try:
- insert_into_queue(video_id, time_stamp, trace_id)
- except Exception as e:
- print(e)
|