Server
/
piaoquan_crawler


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
							# -*- coding: utf-8 -*-
# @Author: wangkun
# @Time: 2023/3/27
import os
import sys
from datetime import date, timedelta
from lxml import etree
import requests

from common.common import Common

sys.path.append(os.getcwd())
from common.scheduling_db import MysqlHelper

proxies = {"http": None, "https": None}


class HotSearch:
    # 日期，格式 年-月-日
    today = (date.today() + timedelta(days=0)).strftime("%Y-%m-%d")

    # 百度热搜
    @classmethod
    def baidu_hot_search(cls, log_type, crawler, env):
        try:
            url = "https://top.baidu.com/board?tab=realtime"
            headers = {
                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"
            }
            response = requests.get(url=url, headers=headers, proxies=proxies).text
            if len(response) == 0:
                Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n")
                return
            response_html = etree.HTML(response)
            title_list_html = response_html.xpath('//*[@class="c-single-text-ellipsis"]')
            for title_html in title_list_html:
                source = "百度"
                title = title_html.xpath("./text()")[0].strip()
                publish_time = cls.today
                insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
                values("{source}", "{title}", "{publish_time}")"""
                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
                MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
                Common.logger(log_type, crawler).info("写入数据库成功\n")
        except Exception as e:
            Common.logger(log_type, crawler).info(f"baidu_hot_search:{e}\n")


if __name__ == "__main__":
    HotSearch.baidu_hot_search("hot-search", "weixinzhishu", "dec")