| 
					
				 | 
			
			
				@@ -1,54 +1,46 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import asyncio 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import os 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import random 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import sys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import uuid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import asyncio 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import json 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import random 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import uuid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import traceback 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from datetime import datetime 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import aiohttp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import requests 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+sys.path.append(os.getcwd()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from application.common.feishu import FsData 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from application.common.feishu.feishu_utils import FeishuUtils 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from application.common.gpt import GPT4oMini 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from application.common.redis.redis_helper import SyncRedisHelper 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-sys.path.append(os.getcwd()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from application.items import VideoItem 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from application.pipeline import PiaoQuanPipeline 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from application.common.messageQueue import MQ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from application.common.log import AliyunLogger 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from application.common.mysql import MysqlHelper 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from application.functions.zqkd_db_redis import DatabaseOperations, RedisOperations 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from application.items import VideoItem 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from application.pipeline import PiaoQuanPipeline 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from application.common.log import Local 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 class ZhongQingKanDian: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # / recommend(列表11个id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # ↓ 并发请求每个id的 / related(得到列表N个元素) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # ↓ 对每个元素并发请求 / detail 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # ↓ 若为视频,写入Redis(键:detail_id,值:视频数据) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     API_BASE_URL = "http://8.217.192.46:8889" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     COMMON_HEADERS = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         "Content-Type": "application/json" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # 最大重试次数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     MAX_RETRIES = 3 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    TIMEOUT = 30  # 设置超时时间 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    max_recommend_count = 100  # 推荐抓取每日最大量 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    max_related_recommend_count = 200  # 相关推荐抓取每日最大量 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    max_author_video = 300  # 账号每日抓取视频最大量 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        中青看点推荐流 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        Topic:zqkd_recommend_prod 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # 最大等待时长 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    TIMEOUT = 30 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def __init__(self, platform, mode, rule_dict, user_list, env="prod"): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        self.limit_flag = False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        初始化 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param platform: 平台名称 zhongqingkandian 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param mode: 运行模式  recommend 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param rule_dict: 规则字典,包含视频数量限制、时长限制等规则 [{"videos_cnt":{"min":100,"max":0}},{"duration":{"min":30,"max":1200}}] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param user_list: 用户列表 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param env: 运行环境,默认为 "prod" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.limit_flag = True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.platform = platform 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.mode = mode 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.rule_dict = rule_dict 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -58,287 +50,303 @@ class ZhongQingKanDian: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.mq = MQ(topic_name="topic_crawler_etl_" + self.env) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.expire_flag = False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.aliyun_log = AliyunLogger(mode=self.mode, platform=self.platform) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        self.mysql = MysqlHelper(mode=self.mode, platform=self) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.db_ops = DatabaseOperations(mode=mode, platform=platform) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.redis_ops = RedisOperations() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         data_rule = FsData() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.title_rule = data_rule.get_title_rule() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.LocalLog = Local.logger(self.platform, self.mode) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     async def send_request(self, path, data): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        异步发送 POST 请求到指定路径,带有重试机制。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param path: 请求的 API 路径 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param data: 请求的数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 响应的 JSON 数据,如果请求失败则返回 None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         full_url = f"{self.API_BASE_URL}{path}" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         async with aiohttp.ClientSession(headers=self.COMMON_HEADERS) as session: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             for retry in range(self.MAX_RETRIES): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     async with session.post(full_url, data=data, timeout=self.TIMEOUT) as response: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         response.raise_for_status() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        self.LocalLog.info(f"{path}响应数据:{await response.json()}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         return await response.json() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                except aiohttp.ClientError as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    if retry < self.MAX_RETRIES - 1: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        await asyncio.sleep(2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                except json.JSONDecodeError as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    if retry < self.MAX_RETRIES - 1: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        await asyncio.sleep(2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                except (aiohttp.ClientError, json.JSONDecodeError) as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    tb_info = traceback.format_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    self.LocalLog.info(f"{path}请求失败:{e}  \n{tb_info}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        code="3000", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        message=f"请求 {path} 失败,错误信息: {str(e)}", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        data={"path": path} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    await asyncio.sleep(random.randint(5, 10)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    def is_response_valid(self, resp): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if resp['code'] != 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def is_response_valid(self, resp, url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        检查响应是否有效(状态码为 0 表示有效)。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param resp: 响应数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param url: 请求的 URL 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 如果响应有效则返回响应数据,否则返回 None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if resp and resp.get('code') != 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    code="3000", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message=f"抓取{url}失败,请求失败,响应:{resp}" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.LocalLog.info(f"{url}请求失败,响应:{resp}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                return None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return resp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            tb_info = traceback.format_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 code="3000", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                message="抓取单条视频失败,请求失败" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            ), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        return resp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                message=f"检查响应有效性时出错,错误信息: {str(e)}", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data={"url": url, "resp": resp} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.LocalLog.info(f"检查 {url} 响应有效性时出错:{e} \n{tb_info}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     async def req_recommend_list(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print("开始请求推荐") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ''' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        推荐请求 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ''' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        url = '/crawler/zhong_qing_kan_dian/recommend' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        body = json.dumps({"cursor": ""}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        resp = await self.send_request(url, body) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        return self.is_response_valid(resp) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    async def req_related_recommend_list(self, content_id): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print("请求相关推荐") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ''' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-         相关推荐请求 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ''' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        url = '/crawler/zhong_qing_kan_dian/related' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        body = json.dumps({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "content_id": str(content_id), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "cursor": "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        resp = await self.send_request(url, body) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        return self.is_response_valid(resp) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        异步请求推荐视频列表。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 推荐视频列表的有效响应数据,如果请求失败则返回 None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    async def req_detail(self, content_link, label,**kwargs): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print("请求详情") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ''' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        请求详情 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ''' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        url = '/crawler/zhong_qing_kan_dian/detail' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        body = json.dumps({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "content_link": content_link 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        resp = await self.send_request(url, body) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if not self.is_response_valid(resp): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        data = resp.get("data", {}).get("data", {}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if data.get("content_type") != "video": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            url = '/crawler/zhong_qing_kan_dian/recommend' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            body = json.dumps({"cursor": ""}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.LocalLog.info(f"开始请求推荐{body}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            resp = await self.send_request(url, body) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return self.is_response_valid(resp, url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            tb_info = traceback.format_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                code="3003", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                message=f"跳过非视频内容(label={label})", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                data={"content_link": content_link} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                code="1003", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                message=f"请求推荐视频列表时发生异常,错误信息: {str(e)}\n{tb_info}", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data={"url": url} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print("是视频") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 将 kwargs 中的键值对更新到 data 字典中 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        data.update(kwargs) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        self.process_video_obj(data, label) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        await asyncio.sleep(10) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    async def control_request(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print("开始处理") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        """核心控制逻辑:顺序处理三个接口""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        recommend_resp = await self.req_recommend_list() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if not self.is_response_valid(recommend_resp): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        recommend_list = recommend_resp.get("data", {}).get("data", []) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.LocalLog.info(f"请求推荐视频列表 {url} 时发生异常:{str(e)}   \n{tb_info}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for video_obj in recommend_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            content_link = video_obj.get("share_url") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            content_id = video_obj.get("id") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if not (content_link and content_id): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # 处理推荐视频详情 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            await self.req_detail(content_link, "recommend",**video_obj) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # # 处理相关推荐列表(间隔后执行) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # await asyncio.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # related_resp = await self.req_related_recommend_list(content_id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # if not self.is_response_valid(related_resp): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            #     continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # related_list = related_resp.get("data", {}).get("data", []) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # for related_obj in related_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            #     related_content_link = related_obj.get("share_url") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            #     if related_content_link: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            #         await self.req_detail(related_content_link, "related",**related_obj) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    def process_video_obj(self, video_obj, label): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    async def req_detail(self, content_link, **kwargs): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        处理视频 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        :param video_obj: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        异步请求视频详情。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param content_link: 视频内容链接 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param label: 视频标签(如 "recommend" 或 "related") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param kwargs: 额外的视频信息 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 无返回值,处理视频详情信息 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.LocalLog.info(f"开始请求视频详情,链接: {content_link}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            url = '/crawler/zhong_qing_kan_dian/detail' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            body = json.dumps({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "content_link": content_link 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            resp = await self.send_request(url, body) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if not self.is_response_valid(resp, url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data = resp.get("data", {}).get("data", {}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if data.get("content_type") != "video": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    code="3003", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message=f"跳过非视频内容)", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    data={"content_link": content_link} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.LocalLog.info(f"跳过非视频内容,链接: {content_link}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.LocalLog.info(f"{content_link} 是视频") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data.update(kwargs) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            await self.process_video_obj(data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            await asyncio.sleep(10) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            tb_info = traceback.format_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                code="1005", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                message=f"请求视频详情时发生异常,错误信息: {str(e)}", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data={"content_link": content_link} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.LocalLog.error(f"请求视频详情,链接 {content_link} 时发生异常:{e}  \n{tb_info}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if not self.save_video_id(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        our_user = random.choice(self.user_list) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        trace_id = self.platform + str(uuid.uuid1()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item = VideoItem() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    async def control_request_recommend(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        控制推荐视频列表的请求和处理流程。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 无返回值,根据下载数量限制控制流程 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        while self.limit_flag: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.LocalLog.info(f"开始推荐视频列表的请求和处理流程,今日已爬推荐 {self.download_cnt} 个视频") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                recommend_resp = await self.req_recommend_list() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if not recommend_resp: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                recommend_list = recommend_resp.get("data", {}).get("data", []) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.LocalLog.info(f"获取的推荐列表长度:{len(recommend_list)}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                for video_obj in recommend_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    content_link = video_obj.get("share_url") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    content_id = video_obj.get("id") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    self.LocalLog.info(f"content_link == {content_link} \n content_id == {content_id}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if not (content_link and content_id): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    # 当前内容id保存到redis 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    self.redis_ops.save_recommend_video(content_id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    await self.req_detail(content_link, **video_obj) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                tb_info = traceback.format_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    code="3008", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message=f"控制推荐视频请求和处理时发生异常,错误信息: {str(e)}", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    data={} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.LocalLog.info(f"控制推荐视频请求和处理时发生异常:\n{tb_info}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.LocalLog.info(f"循环结束,当前 limit_flag 值为: {self.limit_flag}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    async def process_video_obj(self, video_obj): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        处理视频对象,包括检查视频时长、用户信息、保存数据等操作。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param video_obj: 视频对象,包含视频的各种信息 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 无返回值,完成视频对象的处理 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            video_duration = video_obj["video_url_list"][0]['video_duration'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             video_id = video_obj['channel_content_id'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # 检查视频ID是否存在 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if self.redis_ops.check_video_id_exists(video_id): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    code="3004", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message=f"重复视频ID:{video_id}" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.LocalLog.info(f"重复视频ID: {video_id}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            our_user = random.choice(self.user_list) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            trace_id = self.platform + str(uuid.uuid1()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item = VideoItem() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             account_id = video_obj["channel_account_id"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             account_name = video_obj["channel_account_name"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             account_avatar = video_obj["avatar"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            is_repeat_user = self.select_id(account_id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # 判断用户是否重复 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # 检查用户ID是否存在 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            is_repeat_user = self.db_ops.check_user_id(account_id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if is_repeat_user: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                self.update_name_url(account_id, account_name, account_avatar) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # 更新用户信息,使用异步方法并等待结果 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.LocalLog.info(f"用户{account_id}已经存在数据库中") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.db_ops.update_user(account_id, account_name, account_avatar) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                # 写表 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                self.insert_name_url(account_id, account_name, account_avatar) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                # 写redis 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                self.write_redis_user_data(json.dumps({"uid": account_id})) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print("写入成功") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print(f"写入异常{e}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            pass 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        url = video_obj["video_url_list"][0]['video_url'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        duration = video_obj["video_url_list"][0]['video_duration'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("video_id", video_obj['channel_content_id']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("video_title", video_obj["title"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("play_cnt", int(video_obj["read_num"])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("publish_time_stamp", int(int(video_obj["publish_timestamp"])/1000)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("out_user_id", video_obj["channel_account_id"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("cover_url", video_obj["image_url_list"][0]['image_url']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("like_cnt", 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("collection_cnt", int(video_obj['collect_num'])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("share_cnt", int(video_obj["share_num"])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("comment_cnt", int(video_obj["cmt_num"])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("video_url", video_obj["video_url_list"][0]['video_url']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("out_video_id", int(video_obj["channel_content_id"])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("duration", video_obj["video_url_list"][0]['video_duration']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("platform", self.platform) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("strategy", self.mode) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("session", "{}-{}".format(self.platform, int(time.time()))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("user_id", our_user["uid"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        item.add_video_info("user_name", our_user["nick_name"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        mq_obj = item.produce_item() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        pipeline = PiaoQuanPipeline( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            platform=self.platform, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            mode=self.mode, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            rule_dict=self.rule_dict, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            env=self.env, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            item=mq_obj, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            trace_id=trace_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if pipeline.process_item(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            title_list = self.title_rule.split(",") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            title = video_obj["title"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            contains_keyword = any(keyword in title for keyword in title_list) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if contains_keyword: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                new_title = GPT4oMini.get_ai_mini_title(title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if new_title: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    item.add_video_info("video_title", new_title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    current_time = datetime.now() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    values = [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            video_obj["video_url_list"][0]['video_url'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            video_obj["image_url_list"][0]['image_url'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            title, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            new_title, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            formatted_time, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.LocalLog.info(f"用户{account_id}没在数据库中") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # 插入用户信息,使用异步方法并等待结果 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.db_ops.insert_user(account_id, account_name, account_avatar) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.aliyun_log.logging(code="1007", message=f"用户数据写入成功,用户ID:{account_id}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.LocalLog.info(f"用户数据写入成功,用户ID: {account_id}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if video_duration > self.rule_dict.get("duration", {}).get("max", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                                                       1200) or video_duration < self.rule_dict.get( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "duration", {}).get("min", 30): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    code="3005", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message=f"视频时长不满足条件[>=30s&<=1200s]视频ID:{video_obj['channel_content_id']},视频时长:{video_duration}" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.LocalLog.info( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    f"视频时长不满足条件,视频ID: {video_obj['channel_content_id']}, 视频时长: {video_duration}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("video_id", video_obj['channel_content_id']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("video_title", video_obj["title"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("play_cnt", int(video_obj["read_num"])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("publish_time_stamp", int(int(video_obj["publish_timestamp"]) / 1000)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("out_user_id", video_obj["channel_account_id"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("cover_url", video_obj["image_url_list"][0]['image_url']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("like_cnt", 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("collection_cnt", int(video_obj['collect_num'])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("share_cnt", int(video_obj["share_num"])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("comment_cnt", int(video_obj["cmt_num"])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("video_url", video_obj["video_url_list"][0]['video_url']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("out_video_id", int(video_obj["channel_content_id"])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("duration", video_obj["video_url_list"][0]['video_duration']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("platform", self.platform) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("strategy", self.mode) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("session", f"{self.platform}-{int(time.time())}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("user_id", our_user["uid"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            item.add_video_info("user_name", our_user["nick_name"]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            mq_obj = item.produce_item() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            pipeline = PiaoQuanPipeline( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                platform=self.platform, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                mode=self.mode, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                rule_dict=self.rule_dict, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                env=self.env, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                item=mq_obj, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                trace_id=trace_id 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if pipeline.process_item(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                title_list = self.title_rule.split(",") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                title = video_obj["title"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                contains_keyword = any(keyword in title for keyword in title_list) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if contains_keyword: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    new_title = GPT4oMini.get_ai_mini_title(title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if new_title: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        item.add_video_info("video_title", new_title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        current_time = datetime.now() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        values = [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                video_obj["video_url_list"][0]['video_url'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                video_obj["image_url_list"][0]['image_url'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                title, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                new_title, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                formatted_time, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "v8S6nL", "ROWS", 1, 2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    time.sleep(0.5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "v8S6nL", "A2:Z2", values) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            self.download_cnt += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            self.mq.send_msg(mq_obj) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if self.download_cnt >= int( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    self.rule_dict.get("videos_cnt", {}).get("min", 200) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            ): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                self.limit_flag = True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if label == "recommend": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                key = f"crawler:zqkd:{video_id}" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                self.save_video_id(key) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      查询用户id是否存在 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    def select_id(self, uid): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sql = f""" select uid from zqkd_uid where uid = "{uid}"; """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        db = MysqlHelper() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        repeat_user = db.select(sql=sql) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if repeat_user: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        return False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    def update_name_url(self, uid,user_name,avatar_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sql = f""" update zqkd_uid set avatar_url = "{avatar_url}", user_name="{user_name}" where uid = "{uid}"; """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        db = MysqlHelper() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        repeat_video = db.update(sql=sql) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if repeat_video: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        return False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    def insert_name_url(self, uid, user_name, avatar_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        current_time = datetime.now() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        insert_sql = f"""INSERT INTO zqkd_uid (uid, avatar_url, user_name, data_time) values ('{uid}' ,'{avatar_url}','{user_name}', '{formatted_time}')""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        db = MysqlHelper() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        repeat_video = db.update(sql=insert_sql) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if repeat_video: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        return False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    def get_redis_video_data(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        """获取一条id""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        task = f"task:zqkd_video_id" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        helper = SyncRedisHelper() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        client = helper.get_client() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 获取列表的长度 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        list_length = client.llen(task) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 循环获取列表中的元素 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for i in range(list_length): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # 使用 lrange 获取单个元素 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            element = client.lrange(task, i, i) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if element: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print(f"Element at index {i}: {element[0].decode('utf-8')}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                return element 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    def write_redis_user_data(self,key,ret): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        """写入""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        task = f"task:zqkd_user_id" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        helper = SyncRedisHelper() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        client = helper.get_client() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        client.rpush(task, ret) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "v8S6nL", "ROWS", 1, 2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        time.sleep(0.5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "v8S6nL", "A2:Z2", values) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.mq.send_msg(mq_obj) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.download_cnt += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    code="2009", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message=f"成功发送视频到etl", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    data={"video_obj": video_obj} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # 保存视频ID 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.redis_ops.save_video_id(video_obj['channel_content_id']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if self.download_cnt >= self.rule_dict.get("videos_cnt", {}).get("min", 100): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    self.limit_flag = False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            tb_info = traceback.format_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.aliyun_log.logging( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                code="1005", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                message=f"处理视频对象时发生异常,错误信息: {str(e)}", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data={"video_obj": video_obj} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.LocalLog.error(f"处理视频对象时发生异常: {e}\n{tb_info}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     async def run(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        while True: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            await self.control_request() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    def save_video_id(self,key): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        helper = SyncRedisHelper() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        client = helper.get_client() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 将视频ID存储到Redis中,并设置过期时间为7天 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 检查键是否存在 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if client.exists(key): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            expiration_time = int(timedelta(days=7).total_seconds()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            client.setex(key, expiration_time, "1") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        运行主流程,异步执行推荐视频和相关推荐视频的请求,直到达到下载数量限制。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 无返回值,程序运行的主逻辑 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.LocalLog.info("开始执行中青看点推荐抓取...") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        await asyncio.gather( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.control_request_recommend() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from datetime import datetime, timedelta 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 if __name__ == '__main__': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    asyncio.run(ZhongQingKanDian( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        platform="zhongqingkandian", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        mode="recommend", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        rule_dict={"videos_cnt": {"min": 2, "max": 0}}, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        user_list=[{"uid": 81522822, "link": "中青看点推荐", "nick_name": "免不了俗"}] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ).run()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # content_link = "https://vol.youth.cn/4X32ftEV6SsA9Mq9?signature=6y30XlmbkL9oxwAjJd1PXOBX0idx0ZD1gMQE2nZKW8RNpvPrqz" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # asyncio.run(ZhongQingKanDian( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     #     platform="zhongqingkandian", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     #     mode="recommend", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    #     rule_dict={}, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    #     user_list=[{"uid": 81522822, "link": "中青看点推荐", "nick_name": "免不了俗"}, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    #                ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # ).run()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    save_video_id("1234") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    #     rule_dict={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    #         {"videos_cnt":{"min":100,"max":0}},{"duration":{"min":30,"max":1200}} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    #     }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    #     user_list=[{"uid": 81522822, "link": "中青看点推荐", "nick_name": "免不了俗"}] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # ).req_detail(content_link,"测试")) 
			 |