weixinzhishu_hot_search.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/3/27
  4. import os
  5. import sys
  6. from datetime import date, timedelta
  7. from lxml import etree
  8. import requests
  9. from common.common import Common
  10. sys.path.append(os.getcwd())
  11. from common.scheduling_db import MysqlHelper
  12. proxies = {"http": None, "https": None}
  13. class HotSearch:
  14. # 日期,格式 年-月-日
  15. today = (date.today() + timedelta(days=0)).strftime("%Y-%m-%d")
  16. # 百度热搜
  17. @classmethod
  18. def baidu_hot_search(cls, log_type, crawler, env):
  19. try:
  20. url = "https://top.baidu.com/board?tab=realtime"
  21. headers = {
  22. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"
  23. }
  24. response = requests.get(url=url, headers=headers, proxies=proxies).text
  25. if len(response) == 0:
  26. Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n")
  27. return
  28. response_html = etree.HTML(response)
  29. title_list_html = response_html.xpath('//*[@class="c-single-text-ellipsis"]')
  30. for title_html in title_list_html:
  31. source = "百度"
  32. title = title_html.xpath("./text()")[0].strip()
  33. publish_time = cls.today
  34. insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
  35. values("{source}", "{title}", "{publish_time}")"""
  36. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  37. MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
  38. Common.logger(log_type, crawler).info("写入数据库成功\n")
  39. except Exception as e:
  40. Common.logger(log_type, crawler).info(f"baidu_hot_search:{e}\n")
  41. if __name__ == "__main__":
  42. HotSearch.baidu_hot_search("hot-search", "weixinzhishu", "dec")