1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- """
- @author: luojunhui
- """
- import json
- import pymysql
- import requests
- import pandas as pd
- from concurrent.futures.thread import ThreadPoolExecutor
- def request_for_info(video_id):
- """
- 请求数据
- :param video_id:
- :return:
- """
- url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
- data = {
- "videoIdList": [video_id]
- }
- header = {
- "Content-Type": "application/json",
- }
- response = requests.post(url, headers=header, data=json.dumps(data))
- return response.json()
- def migrate_data_to_mysql(video_id, title, view_, return_, video_url):
- """
- 把 data_works 数据迁移到数据库
- :param obj:
- :return:
- """
- rov = int(return_) / int(view_) if int(view_) > 0 else 0
- insert_sql = f"""
- INSERT INTO top_return_daily
- (video_id, title, view_, return_, video_url, dt, rov)
- VALUES
- (%s, %s, %s, %s, %s, %s, %s);
- """
- connection = pymysql.connect(
- host='rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com',
- port=3306,
- user='crawler',
- password='crawler123456@',
- db='piaoquan-crawler',
- charset='utf8mb4'
- )
- cursor = connection.cursor()
- cursor.execute(
- insert_sql,
- (
- video_id,
- title,
- view_,
- return_,
- video_url,
- "20240715",
- rov
- )
- )
- connection.commit()
- def process(line):
- title = line[0]
- video_id = line[1].replace('"', '')
- view = int(line[3])
- return_count = int(line[4])
- video_url = request_for_info(video_id)['data'][0]['videoPath']
- migrate_data_to_mysql(video_id, title, view, return_count, video_url)
- path = "/Users/luojunhui/Downloads/2022-top10000.csv"
- with open(path, encoding="gbk", errors='ignore') as f:
- data = f.readlines()
- L = []
- for line in data:
- temp = line.replace("\n", "").split(",")
- # print(len(temp))
- if len(temp) == 5:
- L.append(temp)
- # for line in L:
- # print(line)
- # data_list = df.values.tolist()
- with ThreadPoolExecutor(max_workers=10) as pool:
- pool.map(process, L)
|