12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879 |
- """
- @author: luojunhui
- """
- import json
- import pymysql
- import requests
- import pandas as pd
- from concurrent.futures.thread import ThreadPoolExecutor
- def request_for_info(video_id):
- """
- 请求数据
- :param video_id:
- :return:
- """
- url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
- data = {
- "videoIdList": [video_id]
- }
- header = {
- "Content-Type": "application/json",
- }
- response = requests.post(url, headers=header, data=json.dumps(data))
- return response.json()
- def migrate_data_to_mysql(video_id, title, view_, return_, video_url):
- """
- 把 data_works 数据迁移到数据库
- :param obj:
- :return:
- """
- rov = int(return_) / int(view_) if int(view_) > 0 else 0
- insert_sql = f"""
- INSERT INTO top_return_daily
- (video_id, title, view_, return_, video_url, dt, rov)
- VALUES
- (%s, %s, %s, %s, %s, %s, %s);
- """
- connection = pymysql.connect(
- host='rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com',
- port=3306,
- user='crawler',
- password='crawler123456@',
- db='piaoquan-crawler',
- charset='utf8mb4'
- )
- cursor = connection.cursor()
- cursor.execute(
- insert_sql,
- (
- video_id,
- title,
- view_,
- return_,
- video_url,
- "20240710",
- rov
- )
- )
- connection.commit()
- def process(line):
- title = line[0]
- video_id = line[1]
- view = line[3]
- return_count = line[4]
- video_url = request_for_info(video_id)['data'][0]['videoPath']
- migrate_data_to_mysql(video_id, title, view, return_count, video_url)
- df = pd.read_excel("/Users/luojunhui/Downloads/top_return_data.xlsx")
- data_list = df.values.tolist()
- with ThreadPoolExecutor(max_workers=10) as pool:
- pool.map(process, data_list)
|