| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 | 
							- """
 
- @author: 罗俊辉
 
- @file: toutiao_get_bogus.py
 
- @time: 2024/1/4
 
- @desc: 用浏览器工具获取头条参数,并且存储到数据库中
 
- """
 
- import json
 
- from typing import Dict
 
- from playwright.sync_api import sync_playwright
 
- from applications.db import DatabaseConnector
 
- from config import long_articles_config
 
- class ToutiaoBogus:
 
-     """
 
-     获取头条请求参数
 
-     """
 
-     def __init__(self):
 
-         """
 
-         初始化ToutiaoBogus类的实例。
 
-         该方法创建了一个DatabaseConnector的实例,并调用其connect方法来建立与数据库的连接。
 
-         Attributes:
 
-             db (DatabaseConnector): 用于与数据库进行交互的DatabaseConnector实例。
 
-         """
 
-         # 创建一个DatabaseConnector实例,用于与数据库进行交互
 
-         self.db = DatabaseConnector(db_config=long_articles_config)
 
-         # 调用DatabaseConnector实例的connect方法,建立与数据库的连接
 
-         self.db.connect()
 
-     def on_request(self, request, category):
 
-         if "https://www.toutiao.com/api/pc/list/feed?" in request.url:
 
-             # request_info = {
 
-             #     'method': request.method,
 
-             #     'url': request.url,
 
-             #     'headers': request.headers if request.headers else {},
 
-             #     'postData': request.post_data if request.post_data else {}
 
-             # }
 
-             insert_sql = f"""
 
-                 INSERT INTO toutiao_request_params
 
-                 (request_method, request_url, request_headers, post_data, category) 
 
-                 VALUES
 
-                 (%s, %s, %s, %s, %s);
 
-             """
 
-             self.db.save(
 
-                 query=insert_sql,
 
-                 params=(
 
-                     request.method, 
 
-                     request.url, 
 
-                     json.dumps(request.headers, ensure_ascii=False), 
 
-                     json.dumps(request.post_data, ensure_ascii=False),
 
-                     category
 
-                 )
 
-             )
 
-         
 
-     def crawler_recommend_article_list(self, category_info: Dict):
 
-         with sync_playwright() as p:
 
-             browser = p.chromium.launch(headless=False)
 
-             context = browser.new_context()
 
-             page = context.new_page()
 
-             page.goto(category_info['url'])
 
-             page.wait_for_load_state("networkidle")
 
-             # 监听请求事件
 
-             page.on("request", lambda request: self.on_request(request, category_info['category']))
 
-             page.get_by_role("button", name=category_info['name']).click()
 
-             page.wait_for_load_state("networkidle")
 
-             page.wait_for_timeout(5000)
 
-             browser.close()
 
 
  |