|
@@ -6,12 +6,9 @@ import sys
|
|
|
from datetime import date, timedelta
|
|
|
from lxml import etree
|
|
|
import requests
|
|
|
-
|
|
|
-from common.common import Common
|
|
|
-
|
|
|
sys.path.append(os.getcwd())
|
|
|
from common.scheduling_db import MysqlHelper
|
|
|
-
|
|
|
+from common.common import Common
|
|
|
proxies = {"http": None, "https": None}
|
|
|
|
|
|
|
|
@@ -38,13 +35,172 @@ class HotSearch:
|
|
|
title = title_html.xpath("./text()")[0].strip()
|
|
|
publish_time = cls.today
|
|
|
insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
|
|
|
- values("{source}", "{title}", "{publish_time}")"""
|
|
|
+ values("{source}", '{title}', "{publish_time}")"""
|
|
|
Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
|
|
|
MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
|
|
|
Common.logger(log_type, crawler).info("写入数据库成功\n")
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).info(f"baidu_hot_search:{e}\n")
|
|
|
|
|
|
+ # 抖音热搜
|
|
|
+ @classmethod
|
|
|
+ def douyin_hot_search(cls, log_type, crawler, env):
|
|
|
+ try:
|
|
|
+ url = "https://tophub.today/n/K7GdaMgdQy"
|
|
|
+ headers = {
|
|
|
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"
|
|
|
+ }
|
|
|
+ response = requests.get(url=url, headers=headers, proxies=proxies).text
|
|
|
+ if len(response) == 0:
|
|
|
+ Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n")
|
|
|
+ return
|
|
|
+ response_html = etree.HTML(response)
|
|
|
+ tr_list = response_html.xpath("//tr")
|
|
|
+ for tr in tr_list:
|
|
|
+ source = "抖音"
|
|
|
+
|
|
|
+ publish_day = tr.xpath("./td[1]/text()")[0]
|
|
|
+ if len(publish_day) < 10:
|
|
|
+ publish_day = cls.today
|
|
|
+ elif publish_day != cls.today:
|
|
|
+ pass
|
|
|
+ publish_time = publish_day
|
|
|
+ if publish_time != cls.today:
|
|
|
+ continue
|
|
|
+
|
|
|
+ title = tr.xpath("./td[2]/*[1]/text()")[0].strip()
|
|
|
+
|
|
|
+ insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
|
|
|
+ values("{source}", '{title}', "{publish_time}")"""
|
|
|
+ Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
|
|
|
+ MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
|
|
|
+ Common.logger(log_type, crawler).info("写入数据库成功\n")
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger(log_type, crawler).error(f"douyin_hot_search:{e}\n")
|
|
|
+
|
|
|
+ # 快手热搜
|
|
|
+ @classmethod
|
|
|
+ def kuaishou_hot_search(cls, log_type, crawler, env):
|
|
|
+ try:
|
|
|
+ url = "https://tophub.today/n/MZd7PrPerO"
|
|
|
+ headers = {
|
|
|
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"
|
|
|
+ }
|
|
|
+ response = requests.get(url=url, headers=headers, proxies=proxies).text
|
|
|
+ if len(response) == 0:
|
|
|
+ Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n")
|
|
|
+ return
|
|
|
+ response_html = etree.HTML(response)
|
|
|
+ tr_list = response_html.xpath("//tr")
|
|
|
+ for tr in tr_list:
|
|
|
+ source = "快手"
|
|
|
+
|
|
|
+ publish_day = tr.xpath("./td[1]/text()")[0]
|
|
|
+ if len(publish_day) < 10:
|
|
|
+ publish_day = cls.today
|
|
|
+ elif publish_day != cls.today:
|
|
|
+ pass
|
|
|
+ publish_time = publish_day
|
|
|
+ if publish_time != cls.today:
|
|
|
+ continue
|
|
|
+
|
|
|
+ title = tr.xpath("./td[2]/*[1]/text()")[0].strip()
|
|
|
+ # print(source)
|
|
|
+ # print(publish_time)
|
|
|
+ # print(title)
|
|
|
+ # print('\n')
|
|
|
+
|
|
|
+ insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
|
|
|
+ values("{source}", '{title}', "{publish_time}")"""
|
|
|
+ Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
|
|
|
+ MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
|
|
|
+ Common.logger(log_type, crawler).info("写入数据库成功\n")
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger(log_type, crawler).error(f"kuaishou_hot_search:{e}\n")
|
|
|
+
|
|
|
+ # 微博热搜
|
|
|
+ @classmethod
|
|
|
+ def weibo_hot_search(cls, log_type, crawler, env):
|
|
|
+ try:
|
|
|
+ url = "https://tophub.today/n/KqndgxeLl9"
|
|
|
+ headers = {
|
|
|
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"
|
|
|
+ }
|
|
|
+ response = requests.get(url=url, headers=headers, proxies=proxies).text
|
|
|
+ if len(response) == 0:
|
|
|
+ Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n")
|
|
|
+ return
|
|
|
+ response_html = etree.HTML(response)
|
|
|
+ tr_list = response_html.xpath("//tr")
|
|
|
+ for tr in tr_list:
|
|
|
+ source = "微博"
|
|
|
+
|
|
|
+ publish_day = tr.xpath("./td[1]/text()")[0]
|
|
|
+ if len(publish_day) < 10:
|
|
|
+ publish_day = cls.today
|
|
|
+ elif publish_day != cls.today:
|
|
|
+ pass
|
|
|
+ publish_time = publish_day
|
|
|
+ if publish_time != cls.today:
|
|
|
+ continue
|
|
|
+
|
|
|
+ title = tr.xpath("./td[2]/*[1]/text()")[0].strip()
|
|
|
+ # print(source)
|
|
|
+ # print(publish_time)
|
|
|
+ # print(title)
|
|
|
+
|
|
|
+ insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
|
|
|
+ values("{source}", '{title}', "{publish_time}")"""
|
|
|
+ Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
|
|
|
+ MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
|
|
|
+ Common.logger(log_type, crawler).info("写入数据库成功\n")
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger(log_type, crawler).error(f"weibo_hot_search:{e}\n")
|
|
|
+
|
|
|
+ # 微信热搜
|
|
|
+ @classmethod
|
|
|
+ def weixin_hot_search(cls, log_type, crawler, env):
|
|
|
+ try:
|
|
|
+ url = "https://tophub.today/n/W1VdJPZoLQ"
|
|
|
+ headers = {
|
|
|
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"
|
|
|
+ }
|
|
|
+ response = requests.get(url=url, headers=headers, proxies=proxies).text
|
|
|
+ if len(response) == 0:
|
|
|
+ Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n")
|
|
|
+ return
|
|
|
+ response_html = etree.HTML(response)
|
|
|
+ tr_list = response_html.xpath("//tr")
|
|
|
+ for tr in tr_list:
|
|
|
+ source = "微信"
|
|
|
+
|
|
|
+ publish_day = tr.xpath("./td[1]/text()")[0]
|
|
|
+ if len(publish_day) < 10:
|
|
|
+ publish_day = cls.today
|
|
|
+ elif publish_day != cls.today:
|
|
|
+ pass
|
|
|
+ publish_time = publish_day
|
|
|
+ if publish_time != cls.today:
|
|
|
+ continue
|
|
|
+
|
|
|
+ title = tr.xpath("./td[2]/*[1]/text()")[0].strip()
|
|
|
+ # print(source)
|
|
|
+ # print(publish_time)
|
|
|
+ # print(title)
|
|
|
+
|
|
|
+ insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
|
|
|
+ values("{source}", '{title}', "{publish_time}")"""
|
|
|
+ Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
|
|
|
+ MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
|
|
|
+ Common.logger(log_type, crawler).info("写入数据库成功\n")
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger(log_type, crawler).error(f"weixin_hot_search:{e}\n")
|
|
|
+
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- HotSearch.baidu_hot_search("hot-search", "weixinzhishu", "dec")
|
|
|
+ # HotSearch.baidu_hot_search("hot-search-baidu", "weixinzhishu", "dev")
|
|
|
+ # HotSearch.douyin_hot_search("hot-search-douyin", "weixinzhishu", "dev")
|
|
|
+ # HotSearch.kuaishou_hot_search("hot-search-kuaishou", "weixinzhishu", "dev")
|
|
|
+ # HotSearch.weibo_hot_search("hot-search-weibo", "weixinzhishu", "dev")
|
|
|
+ HotSearch.weixin_hot_search("hot-search-weixin", "weixinzhishu", "dev")
|
|
|
+
|