luojunhui
/
ScheduledTasks


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
							"""
@author: luojunhui
"""
import json
import pymysql
import requests
import pandas as pd
from concurrent.futures.thread import ThreadPoolExecutor


def request_for_info(video_id):
    """
    请求数据
    :param video_id:
    :return:
    """
    url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
    data = {
        "videoIdList": [video_id]
    }
    header = {
        "Content-Type": "application/json",
    }
    response = requests.post(url, headers=header, data=json.dumps(data))
    return response.json()


def migrate_data_to_mysql(video_id, title, view_, return_, video_url):
    """
    把 data_works 数据迁移到数据库
    :param obj:
    :return:
    """
    rov = int(return_) / int(view_) if int(view_) > 0 else 0
    insert_sql = f"""
        INSERT INTO top_return_daily
            (video_id, title, view_, return_, video_url, dt, rov)
        VALUES 
            (%s, %s, %s, %s, %s, %s, %s);
    """
    connection = pymysql.connect(
        host='rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com',
        port=3306,
        user='crawler',
        password='crawler123456@',
        db='piaoquan-crawler',
        charset='utf8mb4'
    )
    cursor = connection.cursor()
    cursor.execute(
        insert_sql,
        (
            video_id,
            title,
            view_,
            return_,
            video_url,
            "20240710",
            rov
        )
    )
    connection.commit()


def process(line):
    title = line[0]
    video_id = line[1]
    view = line[3]
    return_count = line[4]
    video_url = request_for_info(video_id)['data'][0]['videoPath']
    migrate_data_to_mysql(video_id, title, view, return_count, video_url)


df = pd.read_excel("/Users/luojunhui/Downloads/top_return_data.xlsx")

data_list = df.values.tolist()

with ThreadPoolExecutor(max_workers=10) as pool:
    pool.map(process, data_list)