2 月之前 · 2f4c89884d
--- a/CONFIGURATION.md
+++ b/CONFIGURATION.md
@@ -104,11 +104,45 @@ xiaoniangaoauthor:
 
				   request_body:
			
 
				       cursor: "{{next_cursor}}"
			
 
				       account_id: "{{uid}}" # 数据库的uid
			
 
				+  loop_times: 100
			
 
				+  loop_interval:
			
 
				+    min: 5
			
 
				+    max: 10
			
 
				+  feishu_sheetid: "K0gA9Y"
			
 
				+  response_parse:
			
 
				+    uid: "$.uid" # 数据库的uid
			
 
				+    next_cursor: "$.cursor"
			
 
				+    data: "$.data"
			
 
				+    has_more: "$.data.has_more"
			
 
				+    data_path: "$.data.data"
			
 
				+    fields:
			
 
				+      video_title: "$.title"
			
 
				+      duration: "$.du"
			
 
				+      play_cnt: "$.play_pv"
			
 
				+      like_cnt: "$.favor.total"
			
 
				+      comment_cnt: "$.comment_count"
			
 
				+      share_cnt: "$.share"
			
 
				+      width: "$.w"
			
 
				+      height: "$.h"
			
 
				+      avatar_url: "$.user.hurl"
			
 
				+      cover_url: "$.url"
			
 
				+      video_url: "$.v_url"
			
 
				+      out_user_id: "$.user.mid"
			
 
				+      out_video_id: "$.vid"
			
 
				+      publish_time_stamp: "$.t"
			
 
				+
			
 
				+xiaoniangaorecommend:
			
 
				+  platform: xiaoniangao
			
 
				+  mode: recommend
			
 
				+  path: crawler/xiao_nian_gao_plus/recommend
			
 
				+  method: post
			
 
				+  request_body:
			
 
				+
			
 
				   loop_times: 100
			
 
				   loop_interval:
			
 
				     min: 5
			
 
				     max: 20
			
 
				-  feishu_sheetid: "golXy9"
			
 
				+  feishu_sheetid: "D1nVxQ"
			
 
				   response_parse:
			
 
				     uid: "$.uid" # 数据库的uid
			
 
				     next_cursor: "$.cursor"
			
@@ -174,6 +208,6 @@ xiaoniangaoauthor:
 
				 
			
 
				 ## 当前配置状态
			
 
				 
			
 
				-- 平台配置数量: 3
			
 
				+- 平台配置数量: 4
			
 
				 - 运行环境: prod
			
 
				-- 配置文件路径: /AutoScraperX/config/spiders_config.yaml
			
 
				+- 配置文件路径: /Users/zhangliang/Documents/piaoquan/AutoScraperX/config/spiders_config.yaml
			
--- a/README.md
+++ b/README.md
@@ -303,4 +303,10 @@ sh deploy.sh
 
				 2. **生成最新的配置文档**：
			
 
				    ```bash
			
 
				    python -m core.utils.config_documentation
			
 
				-   ```
			
 
				+   ```
			
 
				+### 项目相关记录
			
 
				+1. **写入安全标题表**：
			
 
				+   ```
			
 
				+   https://w42nne6hzg.feishu.cn/sheets/U5dXsSlPOhiNNCtEfgqcm1iYnpf?sheet=K0gA9Y
			
 
				+   ```
			
 
				+   
			
--- a/config/spiders_config.yaml
+++ b/config/spiders_config.yaml
@@ -68,7 +68,7 @@ xiaoniangaoauthor:
 
				   loop_times: 100
			
 
				   loop_interval:
			
 
				     min: 5
			
 
				-    max: 20
			
 
				+    max: 10
			
 
				   feishu_sheetid: "K0gA9Y"
			
 
				   response_parse:
			
 
				     uid: "$.uid" # 数据库的uid
			
@@ -91,6 +91,40 @@ xiaoniangaoauthor:
 
				       out_user_id: "$.user.mid"
			
 
				       out_video_id: "$.vid"
			
 
				       publish_time_stamp: "$.t"
			
 
				+#
			
 
				+#xiaoniangaorecommend:
			
 
				+#  platform: xiaoniangao
			
 
				+#  mode: recommend
			
 
				+#  path: crawler/xiao_nian_gao_plus/recommend
			
 
				+#  method: post
			
 
				+#  request_body:
			
 
				+#
			
 
				+#  loop_times: 100
			
 
				+#  loop_interval:
			
 
				+#    min: 5
			
 
				+#    max: 20
			
 
				+#  feishu_sheetid: "D1nVxQ"
			
 
				+#  response_parse:
			
 
				+#    uid: "$.uid" # 数据库的uid
			
 
				+#    next_cursor: "$.cursor"
			
 
				+#    data: "$.data"
			
 
				+#    has_more: "$.data.has_more"
			
 
				+#    data_path: "$.data.data"
			
 
				+#    fields:
			
 
				+#      video_title: "$.title"
			
 
				+#      duration: 0
			
 
				+#      play_cnt: "$.play_pv"
			
 
				+#      like_cnt: 0
			
 
				+#      comment_cnt: "$.comment_count"
			
 
				+#      share_cnt: "$.share"
			
 
				+#      width: "$.w"
			
 
				+#      height: "$.h"
			
 
				+#      avatar_url: "$.user.hurl"
			
 
				+#      cover_url: "$.url"
			
 
				+#      video_url: "$.v_url"
			
 
				+#      out_user_id:
			
 
				+#      out_video_id: "$.id"
			
 
				+#      publish_time_stamp: "$.t"
			
 
				 
			
 
				 
			
 
				 
			
--- a/core/utils/helpers.py
+++ b/core/utils/helpers.py
@@ -20,15 +20,16 @@ async def get_title_filter_word() -> List[str]:
 
				         return feishu_data[1]
			
 
				 
			
 
				 async def generate_titles(sheet_id: str,video_obj: Dict,logger,aliyun_log):
			
 
				-    title_list = await get_title_filter_word()
			
 
				-    title = video_obj.get("title")
			
 
				+    title_filter_word = await get_title_filter_word()
			
 
				+    title = video_obj.get("video_title")
			
 
				     if not title:
			
 
				-        return
			
 
				-    contains_keyword = any(keyword in title for keyword in title_list)
			
 
				+        return video_obj
			
 
				+    contains_keyword = any(keyword in title for keyword in title_filter_word)
			
 
				     logger.info(f"【{title}】标题包含过滤关键词：{contains_keyword}")
			
 
				     if contains_keyword:
			
 
				         new_title = await GPT4oMini.get_ai_mini_title(title)
			
 
				         logger.info(f"生成新的标题：{new_title}")
			
 
				+        video_obj["video_title"] = new_title
			
 
				         current_time = datetime.now()
			
 
				         formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
			
 
				         values = [
			
@@ -39,6 +40,7 @@ async def generate_titles(sheet_id: str,video_obj: Dict,logger,aliyun_log):
 
				                 formatted_time,
			
 
				         ]
			
 
				         await insert_safe_data(sheet_id, values)
			
 
				+    return video_obj
			
 
				 
			
 
				 async def insert_safe_data(sheet_id: str, values: List):
			
 
				     spreadsheet_token = "U5dXsSlPOhiNNCtEfgqcm1iYnpf"
			
@@ -49,5 +51,5 @@ async def insert_safe_data(sheet_id: str, values: List):
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-     filter_word = asyncio.run(get_title_filter_word())
			
 
				-     print(filter_word)
			
 
				+     asyncio.run(insert_safe_data("K0gA9Y", ["1","2","3","4","5"]))
			
 
				+    
			
--- a/spiders/basespider.py
+++ b/spiders/basespider.py
@@ -140,7 +140,7 @@ class BaseSpider(ABC):
 
				                 return False
			
 
				             if not await self.filter_data(video_obj):
			
 
				                 return False
			
 
				-            await self.integrated_video_handling(video_obj)
			
 
				+            video_obj = await self.integrated_video_handling(video_obj)
			
 
				             return await self.push_to_etl(video_obj)
			
 
				         except Exception as e:
			
 
				             self.logger.exception(f"视频处理异常: {e}")
			
@@ -206,7 +206,7 @@ class BaseSpider(ABC):
 
				           钩子函数：可在此实现自动生成标题或其他业务逻辑
			
 
				         """
			
 
				         # 视频标题处理生成
			
 
				-        await generate_titles(self.feishu_sheetid, video,self.logger,self.aliyun_log)
			
 
				+        return await generate_titles(self.feishu_sheetid, video,self.logger,self.aliyun_log)
			
 
				 
			
 
				     async def push_to_etl(self, video: Dict) -> bool:
			
 
				         try: