""" @author: 罗俊辉 @file: toutiao_get_bogus.py @time: 2024/1/4 @desc: 用浏览器工具获取头条参数,并且存储到数据库中 """ import json from typing import Dict from playwright.sync_api import sync_playwright from applications.db import DatabaseConnector from config import long_articles_config class ToutiaoBogus: """ 获取头条请求参数 """ def __init__(self): """ 初始化ToutiaoBogus类的实例。 该方法创建了一个DatabaseConnector的实例,并调用其connect方法来建立与数据库的连接。 Attributes: db (DatabaseConnector): 用于与数据库进行交互的DatabaseConnector实例。 """ # 创建一个DatabaseConnector实例,用于与数据库进行交互 self.db = DatabaseConnector(db_config=long_articles_config) # 调用DatabaseConnector实例的connect方法,建立与数据库的连接 self.db.connect() def on_request(self, request, category): if "https://www.toutiao.com/api/pc/list/feed?" in request.url: # request_info = { # 'method': request.method, # 'url': request.url, # 'headers': request.headers if request.headers else {}, # 'postData': request.post_data if request.post_data else {} # } insert_sql = f""" INSERT INTO toutiao_request_params (request_method, request_url, request_headers, post_data, category) VALUES (%s, %s, %s, %s, %s); """ self.db.save( query=insert_sql, params=( request.method, request.url, json.dumps(request.headers, ensure_ascii=False), json.dumps(request.post_data, ensure_ascii=False), category ) ) def crawler_recommend_article_list(self, category_info: Dict): with sync_playwright() as p: browser = p.chromium.launch(headless=False) context = browser.new_context() page = context.new_page() page.goto(category_info['url']) page.wait_for_load_state("networkidle") # 监听请求事件 page.on("request", lambda request: self.on_request(request, category_info['category'])) page.get_by_role("button", name=category_info['name']).click() page.wait_for_load_state("networkidle") page.wait_for_timeout(5000) browser.close()