main.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. import os
  2. import json
  3. import asyncio
  4. import pandas as pd
  5. from pandas import DataFrame
  6. from playwright.async_api import async_playwright
  7. from functions import check_account, get_info, Mysql
  8. platform_name = None
  9. data_frame = []
  10. lock = asyncio.Lock()
  11. async def handle_download(download):
  12. global platform_name
  13. global data_frame
  14. async with lock:
  15. temp = [platform_name]
  16. download_path = await download.path()
  17. # 指定新文件名
  18. new_file_path = r"temp_file\temp.xlsx"
  19. # 重命名文件
  20. os.rename(download_path, new_file_path)
  21. df = pd.read_excel(new_file_path)
  22. df = df.values.tolist()
  23. # 实验名称
  24. task_name = df[0][1]
  25. temp.append(task_name)
  26. # 数据指标
  27. task_index = df[5][1]
  28. temp.append(task_index)
  29. title_row = df[8]
  30. data_line = df[-1]
  31. temp += data_line
  32. w = {temp: data_line[index] for index, temp in enumerate(title_row)}
  33. print(platform_name, "\t", task_name, "\t", task_index)
  34. print(json.dumps(w, ensure_ascii=False, indent=4))
  35. while len(temp) < 29:
  36. temp.append("NULL")
  37. Mysql().insert_line(data=temp)
  38. data_frame.append(temp)
  39. os.remove(new_file_path)
  40. async def run(playwright):
  41. """
  42. 小程序:
  43. -1 票圈内容精选 ok
  44. -2 票圈 l 3亿人喜欢的视频平台 ok
  45. -3 票圈 l 视频精选
  46. -4 票圈 l 祝福
  47. -5 票圈 l 福年
  48. -6 票圈 l 信仰之路 ok
  49. -7 票圈视频 ok
  50. -8 票圈短视频
  51. -9 老好看视频 ok
  52. -10 票圈最惊奇 ok
  53. -11 票圈视频+
  54. """
  55. global platform_name
  56. global data_frame
  57. # 初始化浏览器
  58. browser = await playwright.chromium.launch(
  59. headless=False,
  60. downloads_path="temp_file",
  61. )
  62. context = await browser.new_context(accept_downloads=True)
  63. page = await context.new_page()
  64. await page.set_viewport_size({"width": 1680, "height": 1080})
  65. page.on("download", handle_download)
  66. # 登陆,需要扫码
  67. await page.goto("https://wedata.weixin.qq.com/mp2/login")
  68. await page.goto("https://wedata.weixin.qq.com/mp2/?source=0")
  69. await page.goto("https://wedata.weixin.qq.com/mp2/basic-data/core-data?source=0")
  70. # 默认先进入票圈内容精选
  71. platform_name = '票圈内容精选'
  72. await get_info(page, platform_name)
  73. # # 票圈 | 3亿人喜欢的视频平台
  74. # platform_name = "票圈 l 3亿人喜欢的视频平台"
  75. # await check_account(page, platform_name)
  76. # await asyncio.sleep(3)
  77. # await get_info(page, platform_name)
  78. # 票圈 | 视频精选
  79. # platform_name = "票圈 l 视频精选"
  80. # await check_account(page, platform_name)
  81. # await get_info(page, platform_name)
  82. # 票圈 l 祝福
  83. # platform_name = "票圈 l 祝福"
  84. # await check_account(page, platform_name)
  85. # await get_info(page, platform_name)
  86. # 票圈 l 福年 1
  87. # platform_name = "票圈 I 福年"
  88. # await check_account(page, platform_name)
  89. # await get_info(page, platform_name)
  90. # 票圈 l 信仰之路 1
  91. # platform_name = "票圈 l 信仰之路"
  92. # await check_account(page, platform_name)
  93. # await get_info(page, platform_name)
  94. # 票圈视频
  95. # platform_name = "票圈视频"
  96. # await check_account(page, platform_name)
  97. # await get_info(page, platform_name)
  98. # 票圈短视频 1
  99. # platform_name = "票圈短视频"
  100. # await check_account(page, platform_name)
  101. # await get_info(page, platform_name)
  102. # 老好看视频
  103. # platform_name = "老好看视频"
  104. # await check_account(page, platform_name)
  105. # await get_info(page, platform_name)
  106. # 票圈最惊奇
  107. # platform_name = "票圈最惊奇"
  108. # await check_account(page, platform_name)
  109. # await get_info(page, platform_name)
  110. # 票圈视频+
  111. platform_name = "票圈视频+"
  112. await check_account(page, platform_name)
  113. await get_info(page, platform_name)
  114. columns = ["小程序", "实验", "指标", "日期", "对照组"] + [
  115. "实验组{}".format(i) for i in range(1, 25)
  116. ]
  117. print(len(columns))
  118. out_df = DataFrame(data_frame, columns=columns)
  119. out_df.to_excel("{}.xlsx".format("2023-12-29-result"), index=False)
  120. # await asyncio.sleep(100)
  121. # 关闭浏览器上下文和浏览器
  122. await context.close()
  123. await browser.close()
  124. async def main():
  125. async with async_playwright() as playwright:
  126. await run(playwright)
  127. asyncio.run(main())