# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2023/3/27 import os import sys from datetime import date, timedelta from lxml import etree import requests from common.common import Common sys.path.append(os.getcwd()) from common.scheduling_db import MysqlHelper proxies = {"http": None, "https": None} class HotSearch: # 日期,格式 年-月-日 today = (date.today() + timedelta(days=0)).strftime("%Y-%m-%d") # 百度热搜 @classmethod def baidu_hot_search(cls, log_type, crawler, env): try: url = "https://top.baidu.com/board?tab=realtime" headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54" } response = requests.get(url=url, headers=headers, proxies=proxies).text if len(response) == 0: Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n") return response_html = etree.HTML(response) title_list_html = response_html.xpath('//*[@class="c-single-text-ellipsis"]') for title_html in title_list_html: source = "百度" title = title_html.xpath("./text()")[0].strip() publish_time = cls.today insert_sql = f"""insert into crawler_hot_title(source, title, publish_time) values("{source}", "{title}", "{publish_time}")""" Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}") MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="") Common.logger(log_type, crawler).info("写入数据库成功\n") except Exception as e: Common.logger(log_type, crawler).info(f"baidu_hot_search:{e}\n") if __name__ == "__main__": HotSearch.baidu_hot_search("hot-search", "weixinzhishu", "dec")