123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- import os
- import json
- import asyncio
- import datetime
- import pandas as pd
- from playwright.async_api import async_playwright
- from applications import get_info, Mysql
- platform_name = None
- lock = asyncio.Lock()
- async def handle_download(download):
- global platform_name
- global data_frame
- async with lock:
- today = datetime.datetime.today().__str__().split(" ")[0].replace("-", "")
- temp = [platform_name]
- download_path = await download.path()
- # 指定新文件名
- new_file_path = r"temp_file\temp.xlsx"
- # 重命名文件
- if os.path.exists(new_file_path):
- os.remove(new_file_path)
- os.rename(download_path, new_file_path)
- if os.path.exists(download_path):
- os.remove(download_path)
- df = pd.read_excel(new_file_path)
- df = df.values.tolist()
- # 实验名称
- task_name = df[0][1]
- temp.append(task_name)
- # 数据指标
- task_index = df[5][1]
- temp.append(task_index)
- title_row = df[8]
- data_line = df[-1]
- temp += data_line
- w = {temp: data_line[index] for index, temp in enumerate(title_row)}
- print(platform_name, "\t", task_name, "\t", task_index)
- print(json.dumps(w, ensure_ascii=False, indent=4))
- while len(temp) < 29:
- temp.append("NULL")
- Mysql().insert_line(data=temp)
- data_frame.append(temp)
- os.remove(new_file_path)
- async def run(playwright):
- global platform_name
- global data_frame
- Id = input("请输入小程序id:\n" + json.dumps({
- 1: "票圈内容精选",
- 2: "票圈 l 3亿人喜欢的视频平台",
- 3: "票圈 l 视频精选",
- 4: "票圈 l 祝福",
- 5: "票圈 l 福年",
- 6: "票圈 l 信仰之路",
- 7: "票圈视频",
- 8: "票圈短视频",
- 9: "老好看视频",
- 10: "票圈最惊奇",
- 11: "票圈视频+"
- }, ensure_ascii=False, indent=4) + "\n:")
- Id = int(Id)
- # 初始化浏览器
- browser = await playwright.chromium.launch(
- headless=False,
- downloads_path="temp_file",
- )
- context = await browser.new_context(accept_downloads=True)
- page = await context.new_page()
- await page.set_viewport_size({"width": 1680, "height": 1080})
- page.on("download", handle_download)
- # 登陆,需要扫码
- await page.goto("https://wedata.weixin.qq.com/mp2/login")
- await page.goto("https://wedata.weixin.qq.com/mp2/?source=0")
- await page.goto("https://wedata.weixin.qq.com/mp2/basic-data/core-data?source=0")
- # # 默认先进入票圈内容精选
- platform = {
- 1: "票圈内容精选",
- 2: "票圈 l 3亿人喜欢的视频平台",
- 3: "票圈 l 视频精选",
- 4: "票圈 l 祝福",
- 5: "票圈 l 福年",
- 6: "票圈 l 信仰之路",
- 7: "票圈视频",
- 8: "票圈短视频",
- 9: "老好看视频",
- 10: "票圈最惊奇",
- 11: "票圈视频+"
- }
- platform_name = platform[Id]
- await get_info(page, platform_name)
- # 关闭浏览器上下文和浏览器
- await context.close()
- await browser.close()
- async def main():
- async with async_playwright() as playwright:
- await run(playwright)
- asyncio.run(main())
|