main_v2.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. """
  2. Created on Sun 7 Jan 2024
  3. @author: luojunhui
  4. """
  5. import os
  6. import json
  7. import asyncio
  8. import datetime
  9. import pandas as pd
  10. from playwright.async_api import async_playwright
  11. from applications import check_account, Mysql
  12. from applications.config import platform_map
  13. from applications.functions import navigate
  14. from feishu.feishu import Feishu
  15. platform_name = None
  16. data_frame = []
  17. lock = asyncio.Lock()
  18. async def handle_download(download):
  19. global platform_name
  20. global data_frame
  21. async with lock:
  22. today = datetime.datetime.today().__str__().split(" ")[0].replace("-", "")
  23. temp = [platform_name]
  24. download_path = await download.path()
  25. # 指定新文件名
  26. new_file_path = r"temp_file\temp.xlsx"
  27. # 重命名文件
  28. if os.path.exists(new_file_path):
  29. os.remove(new_file_path)
  30. os.rename(download_path, new_file_path)
  31. if os.path.exists(download_path):
  32. os.remove(download_path)
  33. df = pd.read_excel(new_file_path)
  34. df = df.values.tolist()
  35. # 实验名称
  36. task_name = df[0][1]
  37. temp.append(task_name)
  38. # 数据指标
  39. task_index = df[5][1]
  40. temp.append(task_index)
  41. title_row = df[8]
  42. data_line = df[-1]
  43. temp += data_line
  44. w = {temp: data_line[index] for index, temp in enumerate(title_row)}
  45. print(platform_name, "\t", task_name, "\t", task_index)
  46. print(json.dumps(w, ensure_ascii=False, indent=4))
  47. while len(temp) < 29:
  48. temp.append("NULL")
  49. Mysql().insert_line(data=temp)
  50. data_frame.append(temp)
  51. os.remove(new_file_path)
  52. async def run(playwright):
  53. """
  54. 小程序:
  55. -1 票圈内容精选 ok
  56. -2 票圈 l 3亿人喜欢的视频平台 ok
  57. -3 票圈 l 视频精选
  58. -4 票圈 l 祝福
  59. -5 票圈 l 福年
  60. -6 票圈 l 信仰之路 ok
  61. -7 票圈视频 ok
  62. -8 票圈短视频
  63. -9 老好看视频 ok
  64. -10 票圈最惊奇 ok
  65. -11 票圈视频+
  66. """
  67. global platform_name
  68. global data_frame
  69. # 初始化浏览器
  70. browser = await playwright.chromium.launch(
  71. headless=False,
  72. downloads_path="temp_file",
  73. )
  74. context = await browser.new_context(accept_downloads=True)
  75. page = await context.new_page()
  76. await page.set_viewport_size({"width": 1680, "height": 1080})
  77. page.on("download", handle_download)
  78. # 登陆,需要扫码
  79. await page.goto("https://wedata.weixin.qq.com/mp2/login")
  80. await page.goto("https://wedata.weixin.qq.com/mp2/?source=0")
  81. await page.goto("https://wedata.weixin.qq.com/mp2/basic-data/core-data?source=0")
  82. done_dict = {}
  83. F = Feishu()
  84. platform_name = '票圈内容精选'
  85. await navigate(page, platform_name)
  86. done_dict[platform_name] = 1
  87. F.bot(platform_name)
  88. for platform_name in platform_map.keys():
  89. if done_dict.get(platform_name):
  90. print(f"{platform_name} has already been exported")
  91. else:
  92. await check_account(page, platform_name)
  93. try:
  94. await navigate(page, platform_name)
  95. done_dict[platform_name] = 1
  96. F.bot(platform_name)
  97. except Exception as e:
  98. F.bot(platform_name, 2)
  99. continue
  100. # 关闭浏览器上下文和浏览器
  101. await context.close()
  102. await browser.close()
  103. async def main():
  104. async with async_playwright() as playwright:
  105. await run(playwright)
  106. asyncio.run(main())