123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- # -*- coding: utf-8 -*-
- # @Author: wangkun
- # @Time: 2023/9/25
- import argparse
- import json
- import os
- import sys
- import time
- sys.path.append(os.getcwd())
- from common.common import Common
- from common.scheduling_db import MysqlHelper
- class KanyikanSession:
- chls_file_path = "./kanyikan/chlsfiles/"
- @classmethod
- def get_session(cls, log_type, crawler, env):
- while True:
- all_files = os.listdir(cls.chls_file_path)
- chls_files = []
- for chls_file in all_files:
- if "charles" in os.path.splitext(chls_file)[0]:
- chls_files.append(chls_file)
- if len(chls_files) < 1:
- Common.logger(log_type, crawler).info("未找到chlsfile文件,等待60s重试")
- Common.logging(log_type, crawler, env, "未找到chlsfile文件,等待60s重试")
- time.sleep(60)
- continue
- chls_file = sorted(chls_files)[-1]
- Common.logger(log_type, crawler).info(f"chls_file:{chls_file}")
- chls_file_name = os.path.splitext(chls_file)[0]
- # 重命名文件后缀
- os.rename(os.path.join(cls.chls_file_path, chls_file),
- os.path.join(cls.chls_file_path, f"{chls_file_name}.txt"))
- with open(os.path.join(cls.chls_file_path, f"{chls_file_name}.txt"), encoding='utf-8-sig',
- errors='ignore') as f:
- contents = json.load(f, strict=False)
- kanyikan_request_list = []
- for content in contents:
- if "search.weixin.qq.com" in content["host"]:
- kanyikan_request_list.append(content)
- if len(kanyikan_request_list) == 0:
- Common.logger(log_type, crawler).info("chlsfile文件中未找到:search.weixin.qq.com,等待60s重试")
- Common.logging(log_type, crawler, env, "chlsfile文件中未找到:search.weixin.qq.com,等待60s重试")
- time.sleep(60)
- continue
- for kanyikan_request in kanyikan_request_list:
- if kanyikan_request["path"] == "/cgi-bin/recwxa/recwxagetunreadmessagecnt":
- Common.logger(log_type, crawler).info(f'query:{kanyikan_request["query"]}\n')
- sessions = kanyikan_request["query"].split("session=")[-1].split("&")[0]
- Common.logger(log_type, crawler).info(f"sessions:{sessions}\n")
- for session in sessions:
- if any(keyword in session for keyword in
- ["vid", "offset", "wxaVersion", "limit", "scene", "count", "channelid", "subscene",
- "clientVersion", "sharesearchid", "nettype", "switchprofile", "switchnewuser"]):
- session = session.split("&")[0]
- return session
- return sessions
- @classmethod
- def del_chls_file(cls, log_type, crawler):
- all_file = sorted(os.listdir(cls.chls_file_path))
- for file in all_file:
- os.remove(os.path.join(cls.chls_file_path, file))
- Common.logger(log_type, crawler).info("删除 charles 缓存文件成功\n")
- @classmethod
- def save_session(cls, log_type, crawler, env, kanyikan_type):
- session = cls.get_session(log_type, crawler, env)
- Common.logger(log_type, crawler).info(session)
- if kanyikan_type == "kyk":
- update_sql = f""" UPDATE crawler_config SET config = JSON_SET (config, "$.token", "{session}"), update_time={int(time.time()*1000)} WHERE title="看一看推荐"; """
- MysqlHelper.update_values(log_type, crawler, update_sql, env, action="")
- elif kanyikan_type == "kykjk":
- update_sql = f""" UPDATE crawler_config SET config = JSON_SET (config, "$.token", "{session}"), update_time={int(time.time()*1000)} WHERE title="看一看健康"; """
- MysqlHelper.update_values(log_type, crawler, update_sql, env, action="")
- elif kanyikan_type == "kykln":
- update_sql = f""" UPDATE crawler_config SET config = JSON_SET (config, "$.token", "{session}"), update_time={int(time.time()*1000)} WHERE title="看一看老年"; """
- MysqlHelper.update_values(log_type, crawler, update_sql, env, action="")
- Common.logger(log_type, crawler).info("session 更新数据库成功")
- cls.del_chls_file(log_type, crawler)
- @classmethod
- def main(cls, log_type, crawler, env, kanyikan_type):
- Common.logger(log_type, crawler).info(f'开始抓取:看一看 session\n')
- Common.logging(log_type, crawler, env, f'开始抓取:看一看 session\n')
- cls.save_session(log_type, crawler, env, kanyikan_type)
- Common.del_logs(log_type, crawler)
- Common.logger(log_type, crawler).info('抓取一轮结束\n')
- Common.logging(log_type, crawler, env, '抓取一轮结束\n')
- if __name__ == "__main__":
- parser = argparse.ArgumentParser() ## 新建参数解释器对象
- parser.add_argument('--log_type', type=str) ## 添加参数,注明参数类型
- parser.add_argument('--crawler') ## 添加参数
- parser.add_argument('--kanyikan_type') ## 添加参数
- parser.add_argument('--env') ## 添加参数
- args = parser.parse_args() ### 参数赋值,也可以通过终端赋值
- KanyikanSession.main(log_type=args.log_type,
- crawler=args.crawler,
- kanyikan_type=args.kanyikan_type,
- env=args.env)
|