12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879 |
- """
- @author: luojunhui
- """
- import pymysql
- from concurrent.futures.thread import ThreadPoolExecutor
- def get_data_list():
- """
- 获取数据
- :return:
- """
- connection = pymysql.connect(
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
- port=3306,
- user="crawler",
- passwd="crawler123456@",
- db="piaoquan-crawler",
- charset="utf8mb4"
- )
- sql = "select trace_id, content_id, recall_video_id1, recall_video_id2, recall_video_id3 from long_articles_video where content_status = 2;"
- cursor = connection.cursor()
- cursor.execute(
- sql
- )
- data = cursor.fetchall()
- return data
- def mig(single_tuple):
- """
- :param single_tuple
- :return:
- """
- trace_id = single_tuple[0]
- content_id = single_tuple[1]
- vid1 = single_tuple[2]
- vid2 = single_tuple[3]
- vid3 = single_tuple[4]
- request_time = trace_id.split("-")[-1]
- if "lehuo" in content_id:
- video_status = 0
- else:
- video_status = 1
- vid_list = [vid1, vid2, vid3]
- for video_id in vid_list:
- if video_id:
- insert_sql = f"""
- INSERT INTO article_match_videos
- (video_id, trace_id, content_id, request_time, video_status)
- values
- (%s, %s, %s, %s, %s);
- """
- connection = pymysql.connect(
- host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
- port=3306,
- user="crawler",
- passwd="crawler123456@",
- db="piaoquan-crawler",
- charset="utf8mb4"
- )
- cursor = connection.cursor()
- cursor.execute(
- insert_sql,
- (
- video_id, trace_id, content_id, request_time, video_status
- )
- )
- connection.commit()
- else:
- continue
- tuple_list = get_data_list()
- print(len(tuple_list))
- with ThreadPoolExecutor(max_workers=20) as Pool:
- Pool.map(mig, tuple_list)
|