zhangliang před 2 měsíci
rodič
revize
baecd6cb6c
1 změnil soubory, kde provedl 154 přidání a 0 odebrání
  1. 154 0
      spider/crawler_offline/zhongqingkandian.py

+ 154 - 0
spider/crawler_offline/zhongqingkandian.py

@@ -0,0 +1,154 @@
+# -*- coding: utf-8 -*-
+import json
+import time
+import requests
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.log import Local
+
+class ZhongQingKanDian:
+    API_BASE_URL = "http://8.217.192.46:8889"
+    COMMON_HEADERS = {
+        "Content-Type": "application/json"
+    }
+    MAX_RETRIES = 3
+    TIMEOUT = 10  # 设置超时时间
+
+    def __init__(self):
+        self.session = requests.Session()
+        self.session.headers.update(self.COMMON_HEADERS)
+        # 初始化请求次数计数器
+        self.recommend_list_request_count = 0
+        self.content_recommend_list_request_count = 0
+        self.detail_request_count = 0
+
+    def send_request(self, endpoint, data):
+        full_url = f"{self.API_BASE_URL}{endpoint}"
+        for retry in range(self.MAX_RETRIES):
+            try:
+                response = self.session.post(full_url, data=data, timeout=self.TIMEOUT)
+                response.raise_for_status()
+
+                return response.json()
+            except requests.RequestException as e:
+                Local.logger("zhongqingkandian", "recommend").info(
+                    f"请求 {full_url} 失败(第 {retry + 1} 次重试): {e}")
+                if retry < self.MAX_RETRIES - 1:
+                    time.sleep(2)
+            except json.JSONDecodeError as e:
+                Local.logger("zhongqingkandian", "recommend").info(
+                    f"解析 {full_url} 的响应数据失败(第 {retry + 1} 次重试): {e}")
+
+                # print(f"解析 {full_url} 的响应数据失败(第 {retry + 1} 次重试): {e}")
+                if retry < self.MAX_RETRIES - 1:
+                    time.sleep(2)
+        return None
+
+    def is_response_valid(self, resp):
+        if resp and resp.get("code", -1) == 0:
+            data = resp.get("data", {}).get("data")
+            return data is not None
+        return False
+
+    def req_recommend_list(self):
+
+        url = '/crawler/zhong_qing_kan_dian/recommend'
+        body = json.dumps({"cursor": ""})
+        resp = self.send_request(url, body)
+        if self.is_response_valid(resp):
+            self.recommend_list_request_count += 1
+            Local.logger("zhongqingkandian", "recommend").info(f"请求推荐流的总次数: {self.recommend_list_request_count}响应:{resp}")
+            return resp["data"]["data"]
+        Local.logger("zhongqingkandian", "recommend").info(
+            f"请求推荐流失败,返回异常: {resp}")
+        return None
+
+    def req_content_recommend_list(self, content_id):
+
+        url = '/crawler/zhong_qing_kan_dian/related'
+        body = json.dumps({
+            "content_id": str(content_id),
+            "cursor": ""
+        })
+        resp = self.send_request(url, body)
+        if self.is_response_valid(resp):
+            self.content_recommend_list_request_count += 1
+            Local.logger("zhongqingkandian", "recommend").info(f"请求内容相关推荐流的总次数: {self.content_recommend_list_request_count}响应:{resp}")
+            return resp["data"]["data"]
+        Local.logger("zhongqingkandian", "recommend").info(
+            f"请求内容相关推荐流失败,返回异常: {resp}")
+        return None
+
+    def req_detail(self, content_link, label):
+
+        url = '/crawler/zhong_qing_kan_dian/detail'
+        body = json.dumps({
+            "content_link": content_link
+        })
+        resp = self.send_request(url, body)
+        if resp and resp.get("code") == 0:
+            self.detail_request_count += 1
+            Local.logger("zhongqingkandian", "recommend").info(f"请求详情的总次数: {self.detail_request_count}")
+            data = resp["data"]["data"]
+            if data["content_type"] == "video":
+                video_id = data['channel_content_id']
+                video_title = data["title"]
+                video_cover = data["image_url_list"][0]['image_url']
+                video_url = data["video_url_list"][0]['video_url']
+                video_duration = data["video_url_list"][0]['video_duration']
+                account_id = data["channel_account_id"]
+                account_name = data["channel_account_name"]
+                account_avatar = data["avatar"]
+                values = [
+                    [
+                        video_title,
+                        video_url,
+                        video_duration,
+                        video_cover,
+                        video_id,
+                        content_link,
+                        account_name,
+                        account_id,
+                        account_avatar,
+                        label,
+                    ]
+                ]
+                FeishuUtils.insert_columns("BvScsJKDWhuj1ctUX1mcBzq1nYb", "a338b3", "ROWS", 1, 2)
+                time.sleep(0.5)
+                FeishuUtils.update_values("BvScsJKDWhuj1ctUX1mcBzq1nYb", "a338b3", "A2:Z2", values)
+            else:
+                Local.logger("zhongqingkandian", "recommend").info(f"不是视频")
+        else:
+            Local.logger("zhongqingkandian", "recommend").info(f"请求详情失败,返回异常: {resp}")
+            return None
+
+    def control_request(self):
+        recommend_list = self.req_recommend_list()
+        if recommend_list:
+            for video_obj in recommend_list:
+                content_link = video_obj.get("share_url")
+                content_id = video_obj.get("id")
+                if content_link and content_id:
+                    time.sleep(2)
+                    detail = self.req_detail(content_link, "推荐")
+                    if detail:
+                        print(detail)
+                    time.sleep(10)
+                    content_recommend_list = self.req_content_recommend_list(content_id)
+                    if content_recommend_list:
+                        for content_obj in content_recommend_list:
+                            content_link = content_obj.get("share_info", {}).get("share_url")
+                            if content_link:
+                                res = self.req_detail(content_link, "内容相关推荐")
+                                if res:
+                                    print(res)
+
+    def run(self):
+        while True:
+            self.control_request()
+
+
+
+
+if __name__ == '__main__':
+    ZhongQingKanDian().run()
+    # ZhongQingKanDian().req_detail('https://vol.youth.cn/1qWiCPOjl1CUewP5?signature=bDjmABzyXE32GNxlOY4pJVbdZfDqw9naZ9vnQ58wq06peMdkrP','ceshi')