""" @author: luojunhui """ import json import pymysql import requests import pandas as pd from concurrent.futures.thread import ThreadPoolExecutor def request_for_info(video_id): """ 请求数据 :param video_id: :return: """ url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo" data = { "videoIdList": [video_id] } header = { "Content-Type": "application/json", } response = requests.post(url, headers=header, data=json.dumps(data)) return response.json() def migrate_data_to_mysql(video_id, title, view_, return_, video_url): """ 把 data_works 数据迁移到数据库 :param obj: :return: """ rov = int(return_) / int(view_) if int(view_) > 0 else 0 insert_sql = f""" INSERT INTO top_return_daily (video_id, title, view_, return_, video_url, dt, rov) VALUES (%s, %s, %s, %s, %s, %s, %s); """ connection = pymysql.connect( host='rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com', port=3306, user='crawler', password='crawler123456@', db='piaoquan-crawler', charset='utf8mb4' ) cursor = connection.cursor() cursor.execute( insert_sql, ( video_id, title, view_, return_, video_url, "20240715", rov ) ) connection.commit() def process(line): title = line[0] video_id = line[1].replace('"', '') view = int(line[3]) return_count = int(line[4]) video_url = request_for_info(video_id)['data'][0]['videoPath'] migrate_data_to_mysql(video_id, title, view, return_count, video_url) path = "/Users/luojunhui/Downloads/2022-top10000.csv" with open(path, encoding="gbk", errors='ignore') as f: data = f.readlines() L = [] for line in data: temp = line.replace("\n", "").split(",") # print(len(temp)) if len(temp) == 5: L.append(temp) # for line in L: # print(line) # data_list = df.values.tolist() with ThreadPoolExecutor(max_workers=10) as pool: pool.map(process, L)