main.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. import os
  2. import json
  3. import asyncio
  4. import datetime
  5. import pandas as pd
  6. from playwright.async_api import async_playwright
  7. from applications import check_account, get_info, Mysql
  8. platform_name = None
  9. data_frame = []
  10. lock = asyncio.Lock()
  11. async def handle_download(download):
  12. global platform_name
  13. global data_frame
  14. async with lock:
  15. today = datetime.datetime.today().__str__().split(" ")[0].replace("-", "")
  16. temp = [platform_name]
  17. download_path = await download.path()
  18. # 指定新文件名
  19. new_file_path = r"temp_file\temp.xlsx"
  20. # 重命名文件
  21. if os.path.exists(new_file_path):
  22. os.remove(new_file_path)
  23. os.rename(download_path, new_file_path)
  24. if os.path.exists(download_path):
  25. os.remove(download_path)
  26. df = pd.read_excel(new_file_path)
  27. df = df.values.tolist()
  28. # 实验名称
  29. task_name = df[0][1]
  30. temp.append(task_name)
  31. # 数据指标
  32. task_index = df[5][1]
  33. temp.append(task_index)
  34. title_row = df[8]
  35. data_line = df[-1]
  36. # if data_line[0] != today:
  37. # print("Not today")
  38. # else:
  39. temp += data_line
  40. w = {temp: data_line[index] for index, temp in enumerate(title_row)}
  41. print(platform_name, "\t", task_name, "\t", task_index)
  42. print(json.dumps(w, ensure_ascii=False, indent=4))
  43. while len(temp) < 29:
  44. temp.append("NULL")
  45. Mysql().insert_line(data=temp)
  46. data_frame.append(temp)
  47. os.remove(new_file_path)
  48. async def run(playwright):
  49. """
  50. 小程序:
  51. -1 票圈内容精选 ok
  52. -2 票圈 l 3亿人喜欢的视频平台 ok
  53. -3 票圈 l 视频精选
  54. -4 票圈 l 祝福
  55. -5 票圈 l 福年
  56. -6 票圈 l 信仰之路 ok
  57. -7 票圈视频 ok
  58. -8 票圈短视频
  59. -9 老好看视频 ok
  60. -10 票圈最惊奇 ok
  61. -11 票圈视频+
  62. """
  63. global platform_name
  64. global data_frame
  65. # 初始化浏览器
  66. browser = await playwright.chromium.launch(
  67. headless=False,
  68. downloads_path="temp_file",
  69. )
  70. context = await browser.new_context(accept_downloads=True)
  71. page = await context.new_page()
  72. await page.set_viewport_size({"width": 1680, "height": 1080})
  73. page.on("download", handle_download)
  74. # 登陆,需要扫码
  75. await page.goto("https://wedata.weixin.qq.com/mp2/login")
  76. await page.goto("https://wedata.weixin.qq.com/mp2/?source=0")
  77. await page.goto("https://wedata.weixin.qq.com/mp2/basic-data/core-data?source=0")
  78. # # 默认先进入票圈内容精选
  79. platform_name = '票圈内容精选'
  80. await get_info(page, platform_name)
  81. # 票圈 | 3亿人喜欢的视频平台
  82. platform_name = "票圈 l 3亿人喜欢的视频平台"
  83. await check_account(page, platform_name)
  84. await asyncio.sleep(3)
  85. await get_info(page, platform_name)
  86. # 票圈 | 视频精选
  87. platform_name = "票圈 l 视频精选"
  88. await check_account(page, platform_name)
  89. await get_info(page, platform_name)
  90. # 票圈 l 祝福
  91. platform_name = "票圈 l 祝福"
  92. await check_account(page, platform_name)
  93. await get_info(page, platform_name)
  94. # 票圈 l 福年 1
  95. platform_name = "票圈 I 福年"
  96. await check_account(page, platform_name)
  97. await get_info(page, platform_name)
  98. # 票圈 l 信仰之路 1
  99. platform_name = "票圈 l 信仰之路"
  100. await check_account(page, platform_name)
  101. await get_info(page, platform_name)
  102. # 票圈视频
  103. platform_name = "票圈视频"
  104. await check_account(page, platform_name)
  105. await get_info(page, platform_name)
  106. # 票圈短视频 1
  107. platform_name = "票圈短视频"
  108. await check_account(page, platform_name)
  109. await get_info(page, platform_name)
  110. # 老好看视频
  111. platform_name = "老好看视频"
  112. await check_account(page, platform_name)
  113. await get_info(page, platform_name)
  114. # 票圈最惊奇
  115. platform_name = "票圈最惊奇"
  116. await check_account(page, platform_name)
  117. await get_info(page, platform_name)
  118. # 票圈视频+
  119. platform_name = "票圈视频+"
  120. await check_account(page, platform_name)
  121. await get_info(page, platform_name)
  122. # columns = ["小程序", "实验", "指标", "日期", "对照组"] + [
  123. # "实验组{}".format(i) for i in range(1, 25)
  124. # ]
  125. # print(len(columns))
  126. # out_df = DataFrame(data_frame, columns=columns)
  127. # out_df.to_excel("{}.xlsx".format("2024-01-02-result"), index=False)
  128. # await asyncio.sleep(100)
  129. # 关闭浏览器上下文和浏览器
  130. await context.close()
  131. await browser.close()
  132. async def main():
  133. async with async_playwright() as playwright:
  134. await run(playwright)
  135. asyncio.run(main())