|
@@ -0,0 +1,156 @@
|
|
|
|
+import os
|
|
|
|
+import json
|
|
|
|
+import asyncio
|
|
|
|
+import pandas as pd
|
|
|
|
+from pandas import DataFrame
|
|
|
|
+from playwright.async_api import async_playwright
|
|
|
|
+
|
|
|
|
+from functions import check_account, get_info, Mysql
|
|
|
|
+
|
|
|
|
+platform_name = None
|
|
|
|
+data_frame = []
|
|
|
|
+lock = asyncio.Lock()
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+async def handle_download(download):
|
|
|
|
+ global platform_name
|
|
|
|
+ global data_frame
|
|
|
|
+
|
|
|
|
+ async with lock:
|
|
|
|
+ temp = [platform_name]
|
|
|
|
+
|
|
|
|
+ download_path = await download.path()
|
|
|
|
+ # 指定新文件名
|
|
|
|
+ new_file_path = r"temp_file\temp.xlsx"
|
|
|
|
+ # 重命名文件
|
|
|
|
+ os.rename(download_path, new_file_path)
|
|
|
|
+ df = pd.read_excel(new_file_path)
|
|
|
|
+ df = df.values.tolist()
|
|
|
|
+ # 实验名称
|
|
|
|
+ task_name = df[0][1]
|
|
|
|
+ temp.append(task_name)
|
|
|
|
+ # 数据指标
|
|
|
|
+ task_index = df[5][1]
|
|
|
|
+ temp.append(task_index)
|
|
|
|
+ title_row = df[8]
|
|
|
|
+ data_line = df[-1]
|
|
|
|
+ temp += data_line
|
|
|
|
+ w = {temp: data_line[index] for index, temp in enumerate(title_row)}
|
|
|
|
+ print(platform_name, "\t", task_name, "\t", task_index)
|
|
|
|
+ print(json.dumps(w, ensure_ascii=False, indent=4))
|
|
|
|
+ while len(temp) < 29:
|
|
|
|
+ temp.append("NULL")
|
|
|
|
+ Mysql().insert_line(data=temp)
|
|
|
|
+ data_frame.append(temp)
|
|
|
|
+ os.remove(new_file_path)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+async def run(playwright):
|
|
|
|
+ """
|
|
|
|
+ 小程序:
|
|
|
|
+ -1 票圈内容精选 ok
|
|
|
|
+ -2 票圈 l 3亿人喜欢的视频平台 ok
|
|
|
|
+ -3 票圈 l 视频精选
|
|
|
|
+ -4 票圈 l 祝福
|
|
|
|
+ -5 票圈 l 福年
|
|
|
|
+ -6 票圈 l 信仰之路 ok
|
|
|
|
+ -7 票圈视频 ok
|
|
|
|
+ -8 票圈短视频
|
|
|
|
+ -9 老好看视频 ok
|
|
|
|
+ -10 票圈最惊奇 ok
|
|
|
|
+ -11 票圈视频+
|
|
|
|
+ """
|
|
|
|
+ global platform_name
|
|
|
|
+ global data_frame
|
|
|
|
+
|
|
|
|
+ # 初始化浏览器
|
|
|
|
+ browser = await playwright.chromium.launch(
|
|
|
|
+ headless=False,
|
|
|
|
+ downloads_path="temp_file",
|
|
|
|
+ )
|
|
|
|
+ context = await browser.new_context(accept_downloads=True)
|
|
|
|
+ page = await context.new_page()
|
|
|
|
+ await page.set_viewport_size({"width": 1680, "height": 1080})
|
|
|
|
+
|
|
|
|
+ page.on("download", handle_download)
|
|
|
|
+
|
|
|
|
+ # 登陆,需要扫码
|
|
|
|
+ await page.goto("https://wedata.weixin.qq.com/mp2/login")
|
|
|
|
+ await page.goto("https://wedata.weixin.qq.com/mp2/?source=0")
|
|
|
|
+ await page.goto("https://wedata.weixin.qq.com/mp2/basic-data/core-data?source=0")
|
|
|
|
+
|
|
|
|
+ # 默认先进入票圈内容精选
|
|
|
|
+ platform_name = '票圈内容精选'
|
|
|
|
+ await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ # # 票圈 | 3亿人喜欢的视频平台
|
|
|
|
+ # platform_name = "票圈 l 3亿人喜欢的视频平台"
|
|
|
|
+ # await check_account(page, platform_name)
|
|
|
|
+ # await asyncio.sleep(3)
|
|
|
|
+ # await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ # 票圈 | 视频精选
|
|
|
|
+ # platform_name = "票圈 l 视频精选"
|
|
|
|
+ # await check_account(page, platform_name)
|
|
|
|
+ # await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ # 票圈 l 祝福
|
|
|
|
+ # platform_name = "票圈 l 祝福"
|
|
|
|
+ # await check_account(page, platform_name)
|
|
|
|
+ # await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ # 票圈 l 福年 1
|
|
|
|
+ # platform_name = "票圈 I 福年"
|
|
|
|
+ # await check_account(page, platform_name)
|
|
|
|
+ # await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ # 票圈 l 信仰之路 1
|
|
|
|
+ # platform_name = "票圈 l 信仰之路"
|
|
|
|
+ # await check_account(page, platform_name)
|
|
|
|
+ # await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ # 票圈视频
|
|
|
|
+ # platform_name = "票圈视频"
|
|
|
|
+ # await check_account(page, platform_name)
|
|
|
|
+ # await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ # 票圈短视频 1
|
|
|
|
+ # platform_name = "票圈短视频"
|
|
|
|
+ # await check_account(page, platform_name)
|
|
|
|
+ # await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ # 老好看视频
|
|
|
|
+ # platform_name = "老好看视频"
|
|
|
|
+ # await check_account(page, platform_name)
|
|
|
|
+ # await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ # 票圈最惊奇
|
|
|
|
+ # platform_name = "票圈最惊奇"
|
|
|
|
+ # await check_account(page, platform_name)
|
|
|
|
+ # await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ # 票圈视频+
|
|
|
|
+ platform_name = "票圈视频+"
|
|
|
|
+ await check_account(page, platform_name)
|
|
|
|
+ await get_info(page, platform_name)
|
|
|
|
+
|
|
|
|
+ columns = ["小程序", "实验", "指标", "日期", "对照组"] + [
|
|
|
|
+ "实验组{}".format(i) for i in range(1, 25)
|
|
|
|
+ ]
|
|
|
|
+ print(len(columns))
|
|
|
|
+ out_df = DataFrame(data_frame, columns=columns)
|
|
|
|
+ out_df.to_excel("{}.xlsx".format("2023-12-29-result"), index=False)
|
|
|
|
+
|
|
|
|
+ # await asyncio.sleep(100)
|
|
|
|
+
|
|
|
|
+ # 关闭浏览器上下文和浏览器
|
|
|
|
+ await context.close()
|
|
|
|
+ await browser.close()
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+async def main():
|
|
|
|
+ async with async_playwright() as playwright:
|
|
|
|
+ await run(playwright)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+asyncio.run(main())
|