kanyikan_session.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/9/25
  4. import argparse
  5. import json
  6. import os
  7. import sys
  8. import time
  9. sys.path.append(os.getcwd())
  10. from common.common import Common
  11. from common.scheduling_db import MysqlHelper
  12. class KanyikanSession:
  13. chls_file_path = "./kanyikan/chlsfiles/"
  14. @classmethod
  15. def get_session(cls, log_type, crawler, env):
  16. while True:
  17. all_files = os.listdir(cls.chls_file_path)
  18. chls_files = []
  19. for chls_file in all_files:
  20. if "charles" in os.path.splitext(chls_file)[0]:
  21. chls_files.append(chls_file)
  22. if len(chls_files) < 1:
  23. Common.logger(log_type, crawler).info("未找到chlsfile文件,等待60s重试")
  24. Common.logging(log_type, crawler, env, "未找到chlsfile文件,等待60s重试")
  25. time.sleep(60)
  26. continue
  27. chls_file = sorted(chls_files)[-1]
  28. Common.logger(log_type, crawler).info(f"chls_file:{chls_file}")
  29. chls_file_name = os.path.splitext(chls_file)[0]
  30. # 重命名文件后缀
  31. os.rename(os.path.join(cls.chls_file_path, chls_file),
  32. os.path.join(cls.chls_file_path, f"{chls_file_name}.txt"))
  33. with open(os.path.join(cls.chls_file_path, f"{chls_file_name}.txt"), encoding='utf-8-sig',
  34. errors='ignore') as f:
  35. contents = json.load(f, strict=False)
  36. kanyikan_request_list = []
  37. for content in contents:
  38. if "search.weixin.qq.com" in content["host"]:
  39. kanyikan_request_list.append(content)
  40. if len(kanyikan_request_list) == 0:
  41. Common.logger(log_type, crawler).info("chlsfile文件中未找到:search.weixin.qq.com,等待60s重试")
  42. Common.logging(log_type, crawler, env, "chlsfile文件中未找到:search.weixin.qq.com,等待60s重试")
  43. time.sleep(60)
  44. continue
  45. for kanyikan_request in kanyikan_request_list:
  46. if kanyikan_request["path"] == "/cgi-bin/recwxa/recwxagetunreadmessagecnt":
  47. Common.logger(log_type, crawler).info(f'query:{kanyikan_request["query"]}\n')
  48. sessions = kanyikan_request["query"].split("session=")[-1].split("&")[0]
  49. Common.logger(log_type, crawler).info(f"sessions:{sessions}\n")
  50. for session in sessions:
  51. if any(keyword in session for keyword in
  52. ["vid", "offset", "wxaVersion", "limit", "scene", "count", "channelid", "subscene",
  53. "clientVersion", "sharesearchid", "nettype", "switchprofile", "switchnewuser"]):
  54. session = session.split("&")[0]
  55. return session
  56. return sessions
  57. @classmethod
  58. def del_chls_file(cls, log_type, crawler):
  59. all_file = sorted(os.listdir(cls.chls_file_path))
  60. for file in all_file:
  61. os.remove(os.path.join(cls.chls_file_path, file))
  62. Common.logger(log_type, crawler).info("删除 charles 缓存文件成功\n")
  63. @classmethod
  64. def save_session(cls, log_type, crawler, env, kanyikan_type):
  65. session = cls.get_session(log_type, crawler, env)
  66. Common.logger(log_type, crawler).info(session)
  67. if kanyikan_type == "kyk":
  68. update_sql = f""" UPDATE crawler_config SET config = JSON_SET (config, "$.token", "{session}"), update_time={int(time.time()*1000)} WHERE title="看一看推荐"; """
  69. MysqlHelper.update_values(log_type, crawler, update_sql, env, action="")
  70. elif kanyikan_type == "kykjk":
  71. update_sql = f""" UPDATE crawler_config SET config = JSON_SET (config, "$.token", "{session}"), update_time={int(time.time()*1000)} WHERE title="看一看健康"; """
  72. MysqlHelper.update_values(log_type, crawler, update_sql, env, action="")
  73. elif kanyikan_type == "kykln":
  74. update_sql = f""" UPDATE crawler_config SET config = JSON_SET (config, "$.token", "{session}"), update_time={int(time.time()*1000)} WHERE title="看一看老年"; """
  75. MysqlHelper.update_values(log_type, crawler, update_sql, env, action="")
  76. Common.logger(log_type, crawler).info("session 更新数据库成功")
  77. cls.del_chls_file(log_type, crawler)
  78. @classmethod
  79. def main(cls, log_type, crawler, env, kanyikan_type):
  80. Common.logger(log_type, crawler).info(f'开始抓取:看一看 session\n')
  81. Common.logging(log_type, crawler, env, f'开始抓取:看一看 session\n')
  82. cls.save_session(log_type, crawler, env, kanyikan_type)
  83. Common.del_logs(log_type, crawler)
  84. Common.logger(log_type, crawler).info('抓取一轮结束\n')
  85. Common.logging(log_type, crawler, env, '抓取一轮结束\n')
  86. if __name__ == "__main__":
  87. parser = argparse.ArgumentParser() ## 新建参数解释器对象
  88. parser.add_argument('--log_type', type=str) ## 添加参数,注明参数类型
  89. parser.add_argument('--crawler') ## 添加参数
  90. parser.add_argument('--kanyikan_type') ## 添加参数
  91. parser.add_argument('--env') ## 添加参数
  92. args = parser.parse_args() ### 参数赋值,也可以通过终端赋值
  93. KanyikanSession.main(log_type=args.log_type,
  94. crawler=args.crawler,
  95. kanyikan_type=args.kanyikan_type,
  96. env=args.env)