import os import json import asyncio import datetime import pandas as pd from playwright.async_api import async_playwright from applications import check_account, get_info, Mysql platform_name = None data_frame = [] lock = asyncio.Lock() async def handle_download(download): global platform_name global data_frame async with lock: today = datetime.datetime.today().__str__().split(" ")[0].replace("-", "") temp = [platform_name] download_path = await download.path() # 指定新文件名 new_file_path = r"temp_file\temp.xlsx" # 重命名文件 if os.path.exists(new_file_path): os.remove(new_file_path) os.rename(download_path, new_file_path) if os.path.exists(download_path): os.remove(download_path) df = pd.read_excel(new_file_path) df = df.values.tolist() # 实验名称 task_name = df[0][1] temp.append(task_name) # 数据指标 task_index = df[5][1] temp.append(task_index) title_row = df[8] data_line = df[-1] # if data_line[0] != today: # print("Not today") # else: temp += data_line w = {temp: data_line[index] for index, temp in enumerate(title_row)} print(platform_name, "\t", task_name, "\t", task_index) print(json.dumps(w, ensure_ascii=False, indent=4)) while len(temp) < 29: temp.append("NULL") Mysql().insert_line(data=temp) data_frame.append(temp) os.remove(new_file_path) async def run(playwright): """ 小程序: -1 票圈内容精选 ok -2 票圈 l 3亿人喜欢的视频平台 ok -3 票圈 l 视频精选 -4 票圈 l 祝福 -5 票圈 l 福年 -6 票圈 l 信仰之路 ok -7 票圈视频 ok -8 票圈短视频 -9 老好看视频 ok -10 票圈最惊奇 ok -11 票圈视频+ """ global platform_name global data_frame # 初始化浏览器 browser = await playwright.chromium.launch( headless=False, downloads_path="temp_file", ) context = await browser.new_context(accept_downloads=True) page = await context.new_page() await page.set_viewport_size({"width": 1680, "height": 1080}) page.on("download", handle_download) # 登陆,需要扫码 await page.goto("https://wedata.weixin.qq.com/mp2/login") await page.goto("https://wedata.weixin.qq.com/mp2/?source=0") await page.goto("https://wedata.weixin.qq.com/mp2/basic-data/core-data?source=0") # # 默认先进入票圈内容精选 platform_name = '票圈内容精选' await get_info(page, platform_name) # 票圈 | 3亿人喜欢的视频平台 platform_name = "票圈 l 3亿人喜欢的视频平台" await check_account(page, platform_name) await asyncio.sleep(3) await get_info(page, platform_name) # 票圈 | 视频精选 platform_name = "票圈 l 视频精选" await check_account(page, platform_name) await get_info(page, platform_name) # 票圈 l 祝福 platform_name = "票圈 l 祝福" await check_account(page, platform_name) await get_info(page, platform_name) # 票圈 l 福年 1 platform_name = "票圈 I 福年" await check_account(page, platform_name) await get_info(page, platform_name) # 票圈 l 信仰之路 1 platform_name = "票圈 l 信仰之路" await check_account(page, platform_name) await get_info(page, platform_name) # 票圈视频 platform_name = "票圈视频" await check_account(page, platform_name) await get_info(page, platform_name) # 票圈短视频 1 platform_name = "票圈短视频" await check_account(page, platform_name) await get_info(page, platform_name) # 老好看视频 platform_name = "老好看视频" await check_account(page, platform_name) await get_info(page, platform_name) # 票圈最惊奇 platform_name = "票圈最惊奇" await check_account(page, platform_name) await get_info(page, platform_name) # 票圈视频+ platform_name = "票圈视频+" await check_account(page, platform_name) await get_info(page, platform_name) # columns = ["小程序", "实验", "指标", "日期", "对照组"] + [ # "实验组{}".format(i) for i in range(1, 25) # ] # print(len(columns)) # out_df = DataFrame(data_frame, columns=columns) # out_df.to_excel("{}.xlsx".format("2024-01-02-result"), index=False) # await asyncio.sleep(100) # 关闭浏览器上下文和浏览器 await context.close() await browser.close() async def main(): async with async_playwright() as playwright: await run(playwright) asyncio.run(main())