1 kuukausi sitten · 2c2d0d039d
--- a/config/spiders_config.yaml
+++ b/config/spiders_config.yaml
@@ -67,8 +67,8 @@ xiaoniangaoauthor:
 
				       account_id: "{{uid}}" # 数据库的uid
			
 
				   loop_times: 100
			
 
				   loop_interval:
			
 
				-    min: 30
			
 
				-    max: 60
			
 
				+    min: 5
			
 
				+    max: 20
			
 
				   feishu_sheetid: "golXy9"
			
 
				   response_parse:
			
 
				     uid: "$.uid" # 数据库的uid
			
--- a/services/async_mysql_service.py
+++ b/services/async_mysql_service.py
@@ -161,7 +161,7 @@ class AsyncMysqlService:
 
				         return result["cnt"] if result else 0
			
 
				 
			
 
				     async def get_xng_mid(self) -> int:
			
 
				-        sql = """select DISTINCT(uid) from xng_uid ORDER BY `data_time` DESC;"""
			
 
				+        sql = """select link from crawler_user_v3 where task_id=21;"""
			
 
				         result = await self.fetch_all(sql)
			
 
				         return result if result else 0
			
 
				 
			
--- a/services/pipeline.py
+++ b/services/pipeline.py
@@ -28,11 +28,12 @@ class PiaoQuanPipeline:
 
				         self.mysql = AsyncMysqlService(platform=platform, mode=mode)
			
 
				         self.logger = LoggerManager.get_logger(platform=platform, mode=mode)
			
 
				         self.aliyun_log = LoggerManager.get_aliyun_logger(platform=platform, mode=mode)
			
 
				+        self.feishu_spreadsheet_token = "KsoMsyP2ghleM9tzBfmcEEXBnXg"
			
 
				 
			
 
				     async def feishu_time_list(self):
			
 
				         async with FeishuDataAsync() as feishu_data:
			
 
				             summary = await feishu_data.get_values(
			
 
				-                spreadsheet_token="KsoMsyP2ghleM9tzBfmcEEXBnXg",
			
 
				+                spreadsheet_token=self.feishu_spreadsheet_token,
			
 
				                 sheet_id="RuLK77"
			
 
				             )
			
 
				         for row in summary[1:]:
			
@@ -43,7 +44,7 @@ class PiaoQuanPipeline:
 
				     async def feishu_list(self):
			
 
				         async with FeishuDataAsync() as feishu_data:
			
 
				             summary = await feishu_data.get_values(
			
 
				-                spreadsheet_token="KsoMsyP2ghleM9tzBfmcEEXBnXg",
			
 
				+                spreadsheet_token=self.feishu_spreadsheet_token,
			
 
				                 sheet_id="letS93"
			
 
				             )
			
 
				         for row in summary[1:]:
			
@@ -51,6 +52,17 @@ class PiaoQuanPipeline:
 
				                 return row[1]
			
 
				         return None
			
 
				 
			
 
				+    async def title_restricted_words(self):
			
 
				+        async with FeishuDataAsync() as feishu_data:
			
 
				+            summary = await feishu_data.get_values(
			
 
				+                spreadsheet_token=self.feishu_spreadsheet_token,
			
 
				+                sheet_id="BS9uyu"
			
 
				+            )
			
 
				+        for row in summary[1:]:
			
 
				+            if row[0] == self.platform:
			
 
				+                return row[1]
			
 
				+        return None
			
 
				+
			
 
				     async def publish_time_flag(self) -> bool:
			
 
				         publish_ts = self.item.get("publish_time_stamp", int(time.time()))
			
 
				         update_ts = self.item.get("update_time_stamp", int(time.time()))
			
@@ -145,6 +157,7 @@ class PiaoQuanPipeline:
 
				         """
			
 
				           视频基础下载规则
			
 
				           :return:
			
 
				+          "rule": "[{\"period\":{\"min\":15,\"max\":3}},{\"duration\":{\"min\":50,\"max\":0}},{\"share_cnt\":{\"min\":2,\"max\":0}},{\"videos_cnt\":{\"min\":300,\"max\":0}}]",
			
 
				         """
			
 
				         for key in self.item:
			
 
				             if self.rule_dict.get(key):
			
--- a/spiders/authorspider.py
+++ b/spiders/authorspider.py
@@ -88,7 +88,7 @@ class AuthorSpider(BaseSpider):
 
				         # 解析用户视频列表
			
 
				         data_list = safe_extract(response, self.data_path)
			
 
				         if not data_list:
			
 
				-            self.logger.info(f"用户 {user_uid} 第{self.current_cursor}页无视频数据")
			
 
				+            self.logger.info(f"用户 {user_uid} 第{self.current_cursor or 0}页无视频数据")
			
 
				             return None, None
			
 
				         return has_more, data_list
			
 
				 
			
--- a/spiders/basespider.py
+++ b/spiders/basespider.py
@@ -181,6 +181,9 @@ class BaseSpider(ABC):
 
				     async def push_to_etl(self, video: Dict) -> bool:
			
 
				         try:
			
 
				             await self.mq_producer.send_msg(video)
			
 
				+            self.aliyun_log.logging(code="1009",
			
 
				+                                    message="推送ETL成功",
			
 
				+                                    data=video)
			
 
				             self.logger.info(f"成功推送视频至ETL: {video}")
			
 
				             return True
			
 
				         except Exception as e:
			
--- a/spiders/xiaoniangao_author.py
+++ b/spiders/xiaoniangao_author.py
@@ -8,6 +8,7 @@ class XiaoniangaoAuthor(AuthorSpider):
 
				     async def fetch_user_list(self) -> List[Dict]:
			
 
				         """获取待爬取的用户列表（从数据库）"""
			
 
				         datas =await self.db_service.get_xng_mid()
			
 
				+        datas = [{"uid":data["link"]} for data in datas]
			
 
				         return datas