""" @author: luojunhui """ import json import pymysql import requests import pandas as pd from concurrent.futures.thread import ThreadPoolExecutor def request_for_info(video_id): """ 请求数据 :param video_id: :return: """ url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo" data = { "videoIdList": [video_id] } header = { "Content-Type": "application/json", } response = requests.post(url, headers=header, data=json.dumps(data)) return response.json() def migrate_data_to_mysql(video_id, title, view_, return_, video_url): """ 把 data_works 数据迁移到数据库 :param obj: :return: """ rov = int(return_) / int(view_) if int(view_) > 0 else 0 insert_sql = f""" INSERT INTO top_return_daily (video_id, title, view_, return_, video_url, dt, rov) VALUES (%s, %s, %s, %s, %s, %s, %s); """ connection = pymysql.connect( host='rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com', port=3306, user='crawler', password='crawler123456@', db='piaoquan-crawler', charset='utf8mb4' ) cursor = connection.cursor() cursor.execute( insert_sql, ( video_id, title, view_, return_, video_url, "20240710", rov ) ) connection.commit() def process(line): title = line[0] video_id = line[1] view = line[3] return_count = line[4] video_url = request_for_info(video_id)['data'][0]['videoPath'] migrate_data_to_mysql(video_id, title, view, return_count, video_url) df = pd.read_excel("/Users/luojunhui/Downloads/top_return_data.xlsx") data_list = df.values.tolist() with ThreadPoolExecutor(max_workers=10) as pool: pool.map(process, data_list)