import os import json import asyncio import pandas as pd from pandas import DataFrame from playwright.async_api import async_playwright from functions import check_account, get_info, Mysql platform_name = None data_frame = [] lock = asyncio.Lock() async def handle_download(download): global platform_name global data_frame async with lock: temp = [platform_name] download_path = await download.path() # 指定新文件名 new_file_path = r"temp_file\temp.xlsx" # 重命名文件 os.rename(download_path, new_file_path) df = pd.read_excel(new_file_path) df = df.values.tolist() # 实验名称 task_name = df[0][1] temp.append(task_name) # 数据指标 task_index = df[5][1] temp.append(task_index) title_row = df[8] data_line = df[-1] temp += data_line w = {temp: data_line[index] for index, temp in enumerate(title_row)} print(platform_name, "\t", task_name, "\t", task_index) print(json.dumps(w, ensure_ascii=False, indent=4)) while len(temp) < 29: temp.append("NULL") Mysql().insert_line(data=temp) data_frame.append(temp) os.remove(new_file_path) async def run(playwright): """ 小程序: -1 票圈内容精选 ok -2 票圈 l 3亿人喜欢的视频平台 ok -3 票圈 l 视频精选 -4 票圈 l 祝福 -5 票圈 l 福年 -6 票圈 l 信仰之路 ok -7 票圈视频 ok -8 票圈短视频 -9 老好看视频 ok -10 票圈最惊奇 ok -11 票圈视频+ """ global platform_name global data_frame # 初始化浏览器 browser = await playwright.chromium.launch( headless=False, downloads_path="temp_file", ) context = await browser.new_context(accept_downloads=True) page = await context.new_page() await page.set_viewport_size({"width": 1680, "height": 1080}) page.on("download", handle_download) # 登陆,需要扫码 await page.goto("https://wedata.weixin.qq.com/mp2/login") await page.goto("https://wedata.weixin.qq.com/mp2/?source=0") await page.goto("https://wedata.weixin.qq.com/mp2/basic-data/core-data?source=0") # 默认先进入票圈内容精选 platform_name = '票圈内容精选' await get_info(page, platform_name) # # 票圈 | 3亿人喜欢的视频平台 # platform_name = "票圈 l 3亿人喜欢的视频平台" # await check_account(page, platform_name) # await asyncio.sleep(3) # await get_info(page, platform_name) # 票圈 | 视频精选 # platform_name = "票圈 l 视频精选" # await check_account(page, platform_name) # await get_info(page, platform_name) # 票圈 l 祝福 # platform_name = "票圈 l 祝福" # await check_account(page, platform_name) # await get_info(page, platform_name) # 票圈 l 福年 1 # platform_name = "票圈 I 福年" # await check_account(page, platform_name) # await get_info(page, platform_name) # 票圈 l 信仰之路 1 # platform_name = "票圈 l 信仰之路" # await check_account(page, platform_name) # await get_info(page, platform_name) # 票圈视频 # platform_name = "票圈视频" # await check_account(page, platform_name) # await get_info(page, platform_name) # 票圈短视频 1 # platform_name = "票圈短视频" # await check_account(page, platform_name) # await get_info(page, platform_name) # 老好看视频 # platform_name = "老好看视频" # await check_account(page, platform_name) # await get_info(page, platform_name) # 票圈最惊奇 # platform_name = "票圈最惊奇" # await check_account(page, platform_name) # await get_info(page, platform_name) # 票圈视频+ platform_name = "票圈视频+" await check_account(page, platform_name) await get_info(page, platform_name) columns = ["小程序", "实验", "指标", "日期", "对照组"] + [ "实验组{}".format(i) for i in range(1, 25) ] print(len(columns)) out_df = DataFrame(data_frame, columns=columns) out_df.to_excel("{}.xlsx".format("2023-12-29-result"), index=False) # await asyncio.sleep(100) # 关闭浏览器上下文和浏览器 await context.close() await browser.close() async def main(): async with async_playwright() as playwright: await run(playwright) asyncio.run(main())