2 years ago · 3c8c61454b
--- a/weixinzhishu/weixinzhishu_hot_search/weixinzhishu/.DS_Store
+++ b/weixinzhishu/weixinzhishu_hot_search/weixinzhishu/.DS_Store
--- a/weixinzhishu/weixinzhishu_hot_search/weixinzhishu_hot_search.py
+++ b/weixinzhishu/weixinzhishu_hot_search/weixinzhishu_hot_search.py
@@ -6,12 +6,9 @@ import sys
 
				 from datetime import date, timedelta
			
 
				 from lxml import etree
			
 
				 import requests
			
 
				-
			
 
				-from common.common import Common
			
 
				-
			
 
				 sys.path.append(os.getcwd())
			
 
				 from common.scheduling_db import MysqlHelper
			
 
				-
			
 
				+from common.common import Common
			
 
				 proxies = {"http": None, "https": None}
			
 
				 
			
 
				 
			
@@ -38,13 +35,172 @@ class HotSearch:
 
				                 title = title_html.xpath("./text()")[0].strip()
			
 
				                 publish_time = cls.today
			
 
				                 insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
			
 
				-                values("{source}", "{title}", "{publish_time}")"""
			
 
				+                values("{source}", '{title}', "{publish_time}")"""
			
 
				                 Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
			
 
				                 MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
			
 
				                 Common.logger(log_type, crawler).info("写入数据库成功\n")
			
 
				         except Exception as e:
			
 
				             Common.logger(log_type, crawler).info(f"baidu_hot_search:{e}\n")
			
 
				 
			
 
				+    # 抖音热搜
			
 
				+    @classmethod
			
 
				+    def douyin_hot_search(cls, log_type, crawler, env):
			
 
				+        try:
			
 
				+            url = "https://tophub.today/n/K7GdaMgdQy"
			
 
				+            headers = {
			
 
				+                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"
			
 
				+            }
			
 
				+            response = requests.get(url=url, headers=headers, proxies=proxies).text
			
 
				+            if len(response) == 0:
			
 
				+                Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n")
			
 
				+                return
			
 
				+            response_html = etree.HTML(response)
			
 
				+            tr_list = response_html.xpath("//tr")
			
 
				+            for tr in tr_list:
			
 
				+                source = "抖音"
			
 
				+
			
 
				+                publish_day = tr.xpath("./td[1]/text()")[0]
			
 
				+                if len(publish_day) < 10:
			
 
				+                    publish_day = cls.today
			
 
				+                elif publish_day != cls.today:
			
 
				+                    pass
			
 
				+                publish_time = publish_day
			
 
				+                if publish_time != cls.today:
			
 
				+                    continue
			
 
				+
			
 
				+                title = tr.xpath("./td[2]/*[1]/text()")[0].strip()
			
 
				+
			
 
				+                insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
			
 
				+                                values("{source}", '{title}', "{publish_time}")"""
			
 
				+                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
			
 
				+                MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
			
 
				+                Common.logger(log_type, crawler).info("写入数据库成功\n")
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"douyin_hot_search:{e}\n")
			
 
				+
			
 
				+    # 快手热搜
			
 
				+    @classmethod
			
 
				+    def kuaishou_hot_search(cls, log_type, crawler, env):
			
 
				+        try:
			
 
				+            url = "https://tophub.today/n/MZd7PrPerO"
			
 
				+            headers = {
			
 
				+                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"
			
 
				+            }
			
 
				+            response = requests.get(url=url, headers=headers, proxies=proxies).text
			
 
				+            if len(response) == 0:
			
 
				+                Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n")
			
 
				+                return
			
 
				+            response_html = etree.HTML(response)
			
 
				+            tr_list = response_html.xpath("//tr")
			
 
				+            for tr in tr_list:
			
 
				+                source = "快手"
			
 
				+
			
 
				+                publish_day = tr.xpath("./td[1]/text()")[0]
			
 
				+                if len(publish_day) < 10:
			
 
				+                    publish_day = cls.today
			
 
				+                elif publish_day != cls.today:
			
 
				+                    pass
			
 
				+                publish_time = publish_day
			
 
				+                if publish_time != cls.today:
			
 
				+                    continue
			
 
				+
			
 
				+                title = tr.xpath("./td[2]/*[1]/text()")[0].strip()
			
 
				+                # print(source)
			
 
				+                # print(publish_time)
			
 
				+                # print(title)
			
 
				+                # print('\n')
			
 
				+
			
 
				+                insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
			
 
				+                                values("{source}", '{title}', "{publish_time}")"""
			
 
				+                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
			
 
				+                MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
			
 
				+                Common.logger(log_type, crawler).info("写入数据库成功\n")
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"kuaishou_hot_search:{e}\n")
			
 
				+
			
 
				+    # 微博热搜
			
 
				+    @classmethod
			
 
				+    def weibo_hot_search(cls, log_type, crawler, env):
			
 
				+        try:
			
 
				+            url = "https://tophub.today/n/KqndgxeLl9"
			
 
				+            headers = {
			
 
				+                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"
			
 
				+            }
			
 
				+            response = requests.get(url=url, headers=headers, proxies=proxies).text
			
 
				+            if len(response) == 0:
			
 
				+                Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n")
			
 
				+                return
			
 
				+            response_html = etree.HTML(response)
			
 
				+            tr_list = response_html.xpath("//tr")
			
 
				+            for tr in tr_list:
			
 
				+                source = "微博"
			
 
				+
			
 
				+                publish_day = tr.xpath("./td[1]/text()")[0]
			
 
				+                if len(publish_day) < 10:
			
 
				+                    publish_day = cls.today
			
 
				+                elif publish_day != cls.today:
			
 
				+                    pass
			
 
				+                publish_time = publish_day
			
 
				+                if publish_time != cls.today:
			
 
				+                    continue
			
 
				+
			
 
				+                title = tr.xpath("./td[2]/*[1]/text()")[0].strip()
			
 
				+                # print(source)
			
 
				+                # print(publish_time)
			
 
				+                # print(title)
			
 
				+
			
 
				+                insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
			
 
				+                                values("{source}", '{title}', "{publish_time}")"""
			
 
				+                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
			
 
				+                MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
			
 
				+                Common.logger(log_type, crawler).info("写入数据库成功\n")
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"weibo_hot_search:{e}\n")
			
 
				+
			
 
				+    # 微信热搜
			
 
				+    @classmethod
			
 
				+    def weixin_hot_search(cls, log_type, crawler, env):
			
 
				+        try:
			
 
				+            url = "https://tophub.today/n/W1VdJPZoLQ"
			
 
				+            headers = {
			
 
				+                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54"
			
 
				+            }
			
 
				+            response = requests.get(url=url, headers=headers, proxies=proxies).text
			
 
				+            if len(response) == 0:
			
 
				+                Common.logger(log_type, crawler).error(f"baidu_hot_search:{response}\n")
			
 
				+                return
			
 
				+            response_html = etree.HTML(response)
			
 
				+            tr_list = response_html.xpath("//tr")
			
 
				+            for tr in tr_list:
			
 
				+                source = "微信"
			
 
				+
			
 
				+                publish_day = tr.xpath("./td[1]/text()")[0]
			
 
				+                if len(publish_day) < 10:
			
 
				+                    publish_day = cls.today
			
 
				+                elif publish_day != cls.today:
			
 
				+                    pass
			
 
				+                publish_time = publish_day
			
 
				+                if publish_time != cls.today:
			
 
				+                    continue
			
 
				+
			
 
				+                title = tr.xpath("./td[2]/*[1]/text()")[0].strip()
			
 
				+                # print(source)
			
 
				+                # print(publish_time)
			
 
				+                # print(title)
			
 
				+
			
 
				+                insert_sql = f"""insert into crawler_hot_title(source, title, publish_time)
			
 
				+                                values("{source}", '{title}', "{publish_time}")"""
			
 
				+                Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
			
 
				+                MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
			
 
				+                Common.logger(log_type, crawler).info("写入数据库成功\n")
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"weixin_hot_search:{e}\n")
			
 
				+
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    HotSearch.baidu_hot_search("hot-search", "weixinzhishu", "dec")
			
 
				+    # HotSearch.baidu_hot_search("hot-search-baidu", "weixinzhishu", "dev")
			
 
				+    # HotSearch.douyin_hot_search("hot-search-douyin", "weixinzhishu", "dev")
			
 
				+    # HotSearch.kuaishou_hot_search("hot-search-kuaishou", "weixinzhishu", "dev")
			
 
				+    # HotSearch.weibo_hot_search("hot-search-weibo", "weixinzhishu", "dev")
			
 
				+    HotSearch.weixin_hot_search("hot-search-weixin", "weixinzhishu", "dev")
			
 
				+
			
--- a/weixinzhishu/weixinzhishu_main/run_weixinzhishu_hot_search.py
+++ b/weixinzhishu/weixinzhishu_main/run_weixinzhishu_hot_search.py
@@ -1,18 +1,62 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # @Author: wangkun
			
 
				 # @Time: 2023/3/31
			
 
				-import datetime
			
 
				 import os
			
 
				 import sys
			
 
				+from threading import Thread
			
 
				 sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				 from weixinzhishu.weixinzhishu_hot_search.weixinzhishu_hot_search import HotSearch
			
 
				 
			
 
				 
			
 
				 class Main:
			
 
				     @classmethod
			
 
				-    def main(cls):
			
 
				-        HotSearch.baidu_hot_search()
			
 
				+    def thread_baidu(cls, log_type, crawler, env):
			
 
				+        Common.logger(log_type, crawler).info(f'开始抓取"百度热搜榜"')
			
 
				+        HotSearch.baidu_hot_search(log_type, crawler, env)
			
 
				 
			
 
				+    @classmethod
			
 
				+    def thread_kuaishou(cls, log_type, crawler, env):
			
 
				+        Common.logger(log_type, crawler).info(f'开始抓取"快手热搜榜"')
			
 
				+        HotSearch.kuaishou_hot_search(log_type, crawler, env)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def thread_douyin(cls, log_type, crawler, env):
			
 
				+        Common.logger(log_type, crawler).info(f'开始抓取"抖音热搜榜"')
			
 
				+        HotSearch.douyin_hot_search(log_type, crawler, env)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def thread_weixin(cls, log_type, crawler, env):
			
 
				+        Common.logger(log_type, crawler).info(f'开始抓取"微信热搜榜"')
			
 
				+        HotSearch.weixin_hot_search(log_type, crawler, env)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def thread_weibo(cls, log_type, crawler, env):
			
 
				+        Common.logger(log_type, crawler).info(f'开始抓取"微博热搜榜"')
			
 
				+        HotSearch.weibo_hot_search(log_type, crawler, env)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def thread_main(cls, log_type, crawler, env):
			
 
				+        Common.logger(log_type, crawler).info(f"开始抓取今日热搜榜\n")
			
 
				+        thread_baidu = Thread(target=cls.thread_baidu, args=("hot-search-baidu", crawler, env))
			
 
				+        thread_kuaishou = Thread(target=cls.thread_kuaishou, args=("hot-search-kuaishou", crawler, env))
			
 
				+        thread_douyin = Thread(target=cls.thread_douyin, args=("hot-search-douyin", crawler, env))
			
 
				+        thread_weixin = Thread(target=cls.thread_weixin, args=("hot-search-weixin", crawler, env))
			
 
				+        thread_weibo = Thread(target=cls.thread_weibo, args=("hot-search-weibo", crawler, env))
			
 
				+
			
 
				+        thread_baidu.start()
			
 
				+        thread_kuaishou.start()
			
 
				+        thread_douyin.start()
			
 
				+        thread_weixin.start()
			
 
				+        thread_weibo.start()
			
 
				+
			
 
				+        thread_baidu.join()
			
 
				+        thread_kuaishou.join()
			
 
				+        thread_douyin.join()
			
 
				+        thread_weixin.join()
			
 
				+        thread_weibo.join()
			
 
				+        Common.logger(log_type, crawler).info(f"今日热搜榜全部抓取完毕\n")
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    Main.main()
			
 
				+    Main.thread_main("hot-search", "weixinzhishu", "dev")
			
 
				+    pass