zhangyong vor 9 Monaten
Ursprung
Commit
c8d534b1e2

+ 2 - 1
application/common/feishu/__init__.py

@@ -1,2 +1,3 @@
 from .feishu import Feishu
-from .feishu_insert import FeishuInsert
+from .feishu_insert import FeishuInsert
+from .feishu_data import FsData

+ 20 - 0
application/common/feishu/feishu_data.py

@@ -0,0 +1,20 @@
+from application.common.feishu.feishu_utils import FeishuUtils
+
+
+class FsData:
+
+    def get_title_rule(self):
+        summary = FeishuUtils.get_values_batch("KsoMsyP2ghleM9tzBfmcEEXBnXg", "BS9uyu")
+        for row in summary[1:]:
+            title_rule = row[0]
+            if title_rule:
+                return title_rule
+            else:
+                return None
+        return None
+
+
+if __name__ == '__main__':
+    data_rule = FsData()
+    title_rule = data_rule.get_title_rule()
+    print(title_rule)

+ 0 - 4
application/common/feishu/feishu_utils.py

@@ -396,7 +396,3 @@ class FeishuUtils:
         except Exception as e:
             logger.error(f"bot异常:{e}\n")
 
-
-if __name__ == "__main__":
-    Feishu.bot('recommend', '抖音', '测试: 抖音cookie失效,请及时更换')
-

+ 1 - 0
application/common/gpt/__init__.py

@@ -0,0 +1 @@
+from .gpt4o_mini_help import GPT4oMini

+ 57 - 0
application/common/gpt/gpt4o_mini_help.py

@@ -0,0 +1,57 @@
+import json
+
+import requests
+class GPT4oMini:
+
+
+    @classmethod
+    def get_ai_mini_title(cls, title):
+        url = "http://aigc-api.cybertogether.net//aigc/dev/test/gpt"
+        payload = json.dumps({
+            "imageList": [],
+            "model": "gpt-4o-mini-2024-07-18",
+            "prompt": (
+            "针对微信平台视频类小程序场景"
+            "面向人群是中国中老年人,在单聊、群聊场景。为视频生成一个吸引人的标题。每次生成我会提供一个原标题,你通过规则生成一个新的标题。"
+            "生成规则:"
+            "a.生成的新标题一定不能包含以下风险词。 风险词:“看看”、“全体”、“一定”、“所以人”、“无数人”、“值得一看”、“值得一听”、“99 % ”、“震撼”、“必”、“必看”、“必听”、“必读”、“全场”、“听听”、“一起听听”、“一起”、“快看”、“快来”、“分享”、“转发”、“都看看吧”、“都来”"
+            "b.新标题字符不小于15个字,不超过30个字。"
+            "c.新标题最前面或最后面必须加上emoij符号。如“🔴”、“⭕️”、“🚩”、“🔥”、“💖”"
+            "d.新标题最好只去掉原标题里的低质词,其他句子、语句都保持不变"
+            "e.去掉低质词后,根据语意适当加字句,使新标题整句读起来简洁、通顺、有吸引力、并准确反映视频核心内容"
+            "请严格按照上述规则,生成对应的新标题。"
+            f"请分析该标题,标题为:{title},返回新的标题。"
+            ),
+            "responseFormat": {
+                "type": "json_schema",
+                "json_schema": {
+                    "strict": True,
+                    "name": "share_script_result",
+                    "schema": {
+                        "type": "object",
+                        "properties": {
+                            "新标题": {
+                                "type": "string",
+                                "description": "生成新的标题"
+                            }
+                        },
+                        "required": ["新标题"],
+                        "additionalProperties": False
+                    }
+                }
+            }
+        })
+        headers = {'Content-Type': 'application/json'}
+        try:
+            response = requests.post(url, headers=headers, data=payload)
+            response_data = response.json()
+
+            data = json.loads(response_data.get('data', '{}'))
+            new_title = data["新标题"]
+            return new_title
+        except Exception as e:
+            return None
+
+if __name__ == '__main__':
+    title = GPT4oMini.get_ai_mini_title("🔴这位美女说的太好了!这就是我们的大中国")
+    print(title)

+ 29 - 2
spider/crawler_online/benshanzhufu.py

@@ -11,6 +11,9 @@ import cv2
 import requests
 
 from application.common import Feishu
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 
 sys.path.append(os.getcwd())
 
@@ -58,6 +61,8 @@ class BSZHRecommend(object):
         """
         url = "http://8.217.192.46:8889/crawler/ben_shan_zhu_fu/recommend"
         next_cursor = 1
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         for i in range(1, 200):
             payload = json.dumps({
                 "cursor": f"{next_cursor}"
@@ -85,7 +90,7 @@ class BSZHRecommend(object):
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
                     next_cursor = response['data']['next_cursor']
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -97,7 +102,7 @@ class BSZHRecommend(object):
                     return
                 time.sleep(random.randint(5, 10))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -131,6 +136,28 @@ class BSZHRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["video_url"],
+                            video_obj["video_cover"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "aTSJH4", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "aTSJH4", "A2:Z2", values)
+
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 3
spider/crawler_online/haoyunzhufuduo.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -52,6 +55,8 @@ class HYZFDfRecommend(object):
             'Content-Type': 'application/json'
         }
         url = "http://8.217.192.46:8889/crawler/hao_yun_zhu_fu_duo/recommend"
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             payload = json.dumps({
                 "cursor": ""
@@ -72,7 +77,7 @@ class HYZFDfRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -84,7 +89,7 @@ class HYZFDfRecommend(object):
                     return
                 time.sleep(random.randint(1, 5))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -119,6 +124,27 @@ class HYZFDfRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["videoPath"],
+                            video_obj["coverImagePath"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "V36GHT", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "V36GHT", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 3
spider/crawler_online/jierizhufuhuakaifugui.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -53,6 +56,8 @@ class JRZFHKFGRecommend(object):
         }
         url = "http://8.217.192.46:8889/crawler/jie_ri_zhu_fu_hua_kai_fu_gui/recommend"
         next_cursor = None
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             payload = json.dumps({
                 "cursor": next_cursor
@@ -74,7 +79,7 @@ class JRZFHKFGRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -86,7 +91,7 @@ class JRZFHKFGRecommend(object):
                     return
                 time.sleep(random.randint(1, 5))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -121,6 +126,27 @@ class JRZFHKFGRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["video_url"],
+                            video_obj["video_cover"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "XM11ep", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "XM11ep", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 3
spider/crawler_online/lepaoledong.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -53,6 +56,8 @@ class LPLDRecommend(object):
         }
         url = "http://8.217.192.46:8889/crawler/le_pao_da_run/recommend"
         # url = "http://8.217.192.46:8889/crawler/le_pao_le_dong/recommend"
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             payload = json.dumps({
                 "cursor": ""
@@ -73,7 +78,7 @@ class LPLDRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -85,7 +90,7 @@ class LPLDRecommend(object):
                     return
                 time.sleep(random.randint(1, 5))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -120,6 +125,27 @@ class LPLDRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["urls"][0],
+                            video_obj["cover_url"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "rcQv7r", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "rcQv7r", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 3
spider/crawler_online/lepaoledongdijie.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -53,6 +56,8 @@ class LPLDDJRecommend(object):
         }
         url = "http://8.217.192.46:8889/crawler/le_pao_le_dong_di_jie/recommend"
         # url = "http://8.217.192.46:8889/crawler/le_pao_le_dong/recommend"
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             payload = json.dumps({
                 "cursor": ""
@@ -73,7 +78,7 @@ class LPLDDJRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -85,7 +90,7 @@ class LPLDDJRecommend(object):
                     return
                 time.sleep(random.randint(1, 5))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -120,6 +125,27 @@ class LPLDDJRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["urls"][0],
+                            video_obj["cover_url"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "fjtrYF", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "fjtrYF", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 3
spider/crawler_online/linglingkuailezhufu.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -53,6 +56,8 @@ class LlklzfRecommend(object):
         }
         url = "http://8.217.192.46:8889/crawler/ling_ling_kuai_le_zhu_fu/recommend"
         next_cursor = None
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             payload = json.dumps({
                 "cursor": next_cursor
@@ -74,7 +79,7 @@ class LlklzfRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -86,7 +91,7 @@ class LlklzfRecommend(object):
                     return
                 time.sleep(random.randint(1, 5))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -121,6 +126,27 @@ class LlklzfRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["video_path"],
+                            video_obj["cover_image"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "m9KG95", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("m9KG95", "8c7191", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 4
spider/crawler_online/piaoquangushi.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -53,7 +56,8 @@ class PQGSRecommend(object):
         }
         cursor  = ""
         url = "http://8.217.192.46:8889/crawler/piao_quan_gu_shi/recommend"
-
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             payload = json.dumps({
                 "cursor": cursor
@@ -75,7 +79,7 @@ class PQGSRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj,title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -87,7 +91,7 @@ class PQGSRecommend(object):
                     return
                 time.sleep(random.randint(1, 5))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj,title_rule):
         """
         处理视频
         :param video_obj:
@@ -124,6 +128,27 @@ class PQGSRecommend(object):
                 trace_id=trace_id,
             )
             if pipeline.process_item():
+                title_list = title_rule.split(",")
+                title = video_obj["title"]
+                contains_keyword = any(keyword in title for keyword in title_list)
+                if contains_keyword:
+                    new_title = GPT4oMini.get_ai_mini_title(title)
+                    if new_title:
+                        item.add_video_info("video_title", new_title)
+                        current_time = datetime.now()
+                        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                        values = [
+                            [
+                                video_url,
+                                video_obj["cover"],
+                                title,
+                                new_title,
+                                formatted_time,
+                            ]
+                        ]
+                        FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "8c7191", "ROWS", 1, 2)
+                        time.sleep(0.5)
+                        FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "8c7191", "A2:Z2", values)
                 self.download_cnt += 1
                 self.mq.send_msg(mq_obj)
                 self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 28 - 2
spider/crawler_online/tiantianjufuqi.py

@@ -10,6 +10,9 @@ from datetime import datetime
 import requests
 
 from application.common import Feishu
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 
 sys.path.append(os.getcwd())
 
@@ -62,6 +65,8 @@ class TTJFFQRecommend(object):
             'referer': 'https://servicewechat.com/wxa12a841184757478/7/page-frame.html',
             'accept-language': 'zh-CN,zh;q=0.9'
         }
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             time.sleep(random.randint(1, 10))
             url = "https://api.xinghetime.com/luckvideo/video/getRecommendVideos"
@@ -82,7 +87,7 @@ class TTJFFQRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj,title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -94,7 +99,7 @@ class TTJFFQRecommend(object):
                 return
             time.sleep(random.randint(5, 10))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -143,6 +148,27 @@ class TTJFFQRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["videoPath"],
+                            video_obj["coverImagePath"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "ftQdRy", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "ftQdRy", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 3
spider/crawler_online/weiquanshipin.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -53,6 +56,8 @@ class WQSPRecommend(object):
         }
         cursor  = ""
         url = "http://8.217.192.46:8889/crawler/wei_quan_shi_pin/recommend"
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             payload = json.dumps({
                 "cursor": cursor
@@ -74,7 +79,7 @@ class WQSPRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -86,7 +91,7 @@ class WQSPRecommend(object):
                     return
                 time.sleep(random.randint(1, 5))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -124,6 +129,27 @@ class WQSPRecommend(object):
                 trace_id=trace_id,
             )
             if pipeline.process_item():
+                title_list = title_rule.split(",")
+                title = video_obj["vtitle"]
+                contains_keyword = any(keyword in title for keyword in title_list)
+                if contains_keyword:
+                    new_title = GPT4oMini.get_ai_mini_title(title)
+                    if new_title:
+                        item.add_video_info("video_title", new_title)
+                        current_time = datetime.now()
+                        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                        values = [
+                            [
+                                video_url,
+                                cover_url,
+                                title,
+                                new_title,
+                                formatted_time,
+                            ]
+                        ]
+                        FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "lluIcc", "ROWS", 1, 2)
+                        time.sleep(0.5)
+                        FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "lluIcc", "A2:Z2", values)
                 self.download_cnt += 1
                 self.mq.send_msg(mq_obj)
                 self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 3
spider/crawler_online/xiaoniangaotuijianliu.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -59,6 +62,8 @@ class XNGTJLRecommend(object):
         headers = {
             'Content-Type': 'application/json'
         }
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         for i in range(3):
             url = "http://8.217.192.46:8889/crawler/xiao_nian_gao_plus/recommend"
             payload = json.dumps({})
@@ -75,7 +80,7 @@ class XNGTJLRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -87,7 +92,7 @@ class XNGTJLRecommend(object):
                     return
                 time.sleep(random.randint(5, 10))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -144,6 +149,27 @@ class XNGTJLRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["v_url"],
+                            video_obj["url"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "D1nVxQ", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "D1nVxQ", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 3
spider/crawler_online/yuannifuqimanman.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -53,6 +56,8 @@ class YNFQMMRecommend(object):
         }
         url = "http://8.217.192.46:8889/crawler/yuan_ni_fu_qi_man_man/recommend"
         next_cursor = None
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             payload = json.dumps({
                 "cursor": next_cursor
@@ -74,7 +79,7 @@ class YNFQMMRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -86,7 +91,7 @@ class YNFQMMRecommend(object):
                     return
                 time.sleep(random.randint(1, 5))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -121,6 +126,27 @@ class YNFQMMRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["video_url"],
+                            video_obj["video_cover"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "golXy9", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "golXy9", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 4
spider/crawler_online/zhufukuaizhuan.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -54,7 +57,8 @@ class ZFKZRecommend(object):
         cursor  = ""
         url = "http://8.217.192.46:8889/crawler/zhu_fu_kuai_zhuan/recommend"
 
-
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             payload = json.dumps({
                 "cursor": cursor
@@ -76,7 +80,7 @@ class ZFKZRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -88,7 +92,7 @@ class ZFKZRecommend(object):
                     return
                 time.sleep(random.randint(1, 5))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -123,6 +127,27 @@ class ZFKZRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["video_url"],
+                            video_obj["video_cover"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "qRls74", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "qRls74", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 4
spider/crawler_online/zhufuniannianshunxinjixiang.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -53,7 +56,8 @@ class ZFNNSXJXRecommend(object):
         }
         cursor  = ""
         url = "http://8.217.192.46:8889/crawler/xi_que_bo/recommend"
-
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         while True:
             payload = json.dumps({
                 "cursor": cursor
@@ -75,7 +79,7 @@ class ZFNNSXJXRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj, title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -87,7 +91,7 @@ class ZFNNSXJXRecommend(object):
                     return
                 time.sleep(random.randint(1, 5))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -122,6 +126,27 @@ class ZFNNSXJXRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["video_url"],
+                            video_obj["images"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "WikgoU", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "WikgoU", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)

+ 29 - 3
spider/crawler_online/zhufuquanzituijianliu.py

@@ -4,11 +4,14 @@ import sys
 import time
 import uuid
 import json
-
+from datetime import datetime
 
 import cv2
 import requests
 
+from application.common.feishu import FsData
+from application.common.feishu.feishu_utils import FeishuUtils
+from application.common.gpt import GPT4oMini
 from application.common.mysql.sql import Sql
 from application.common.redis.xng_redis import xng_in_video_data
 
@@ -58,6 +61,8 @@ class ZFQZTJLRecommend(object):
         headers = {
             'Content-Type': 'application/json'
         }
+        data_rule = FsData()
+        title_rule = data_rule.get_title_rule()
         for i in range(2):
             url = "http://8.217.192.46:8889/crawler/zhu_fu_quan_zi/recommend"
             payload = json.dumps({})
@@ -74,7 +79,7 @@ class ZFQZTJLRecommend(object):
                     self.aliyun_log.logging(
                         code="1001", message="扫描到一条视频", data=video_obj
                     )
-                    self.process_video_obj(video_obj)
+                    self.process_video_obj(video_obj,title_rule)
                 except Exception as e:
                     self.aliyun_log.logging(
                         code="3000",
@@ -86,7 +91,7 @@ class ZFQZTJLRecommend(object):
                     return
                 time.sleep(random.randint(5, 10))
 
-    def process_video_obj(self, video_obj):
+    def process_video_obj(self, video_obj, title_rule):
         """
         处理视频
         :param video_obj:
@@ -144,6 +149,27 @@ class ZFQZTJLRecommend(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
+            title_list = title_rule.split(",")
+            title = video_obj["title"]
+            contains_keyword = any(keyword in title for keyword in title_list)
+            if contains_keyword:
+                new_title = GPT4oMini.get_ai_mini_title(title)
+                if new_title:
+                    item.add_video_info("video_title", new_title)
+                    current_time = datetime.now()
+                    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                    values = [
+                        [
+                            video_obj["v_url"],
+                            video_obj["url"],
+                            title,
+                            new_title,
+                            formatted_time,
+                        ]
+                    ]
+                    FeishuUtils.insert_columns("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "GVottu", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    FeishuUtils.update_values("U5dXsSlPOhiNNCtEfgqcm1iYnpf", "GVottu", "A2:Z2", values)
             self.download_cnt += 1
             self.mq.send_msg(mq_obj)
             self.aliyun_log.logging(code="1002", message="成功发送至 ETL", data=mq_obj)