wangkun 1 рік тому
батько
коміт
08f0f609a9

+ 40 - 0
README.MD

@@ -161,4 +161,44 @@ ps aux | grep xigua | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep kuaishou | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep kuaishou | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep douyin | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep douyin | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep xiaoniangao | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep xiaoniangao | grep -v grep | awk '{print $2}' | xargs kill -9
+```
+
+#### 生成 requirements.txt
+```commandline
+cd ./piaoquan_crawler && pipreqs ./ --force
+
+# pip3 install Appium-Python-Client
+Appium_Python_Client==2.10.1
+# 翻墙, pip3 install git+https://github.com/pyatom/pyatom/
+atomac==1.2.0
+# pip3 install ffmpeg-python
+ffmpeg==1.4
+# pip3 install loguru
+loguru==0.6.0
+# pip3 install lxml
+lxml==4.9.1
+# pip3 install mq_http_sdk, 若您使用的SDK版本为v1.0.0,您需要安装大于等于2.5且小于3.0版本的Python。若您使用的SDK版本大于v1.0.0,您需要安装2.5及以上版本的Python。
+mq_http_sdk==1.0.3
+# sudo pip3 install oss2
+oss2==2.15.0
+# pip3 install psutil
+psutil==5.9.2
+# pip3 install PyExecJS
+PyExecJS==1.5.1
+# pip3 install PyMysql
+PyMySQL==1.0.2
+# pip3 install redis
+redis==4.5.1
+# pip3 install requests
+requests==2.27.1
+# pip3 install selenium
+selenium==4.9.1
+# pip3 install urllib3
+urllib3==1.26.9
+# pip3 install jieba
+jieba==0.42.1
+# pip3 install workalendar
+workalendar==17.0.0
+# pip3 install aliyun_python_sdk
+aliyun_python_sdk==2.2.0
 ```
 ```

+ 58 - 1
common/common.py

@@ -4,6 +4,7 @@
 """
 """
 公共方法,包含:生成log / 删除log / 下载方法 / 删除 weixinzhishu_chlsfiles / 过滤词库 / 保存视频信息至本地 txt / 翻译 / ffmpeg
 公共方法,包含:生成log / 删除log / 下载方法 / 删除 weixinzhishu_chlsfiles / 过滤词库 / 保存视频信息至本地 txt / 翻译 / ffmpeg
 """
 """
+from aliyun.log import LogClient, PutLogsRequest, LogItem
 from datetime import date, timedelta
 from datetime import date, timedelta
 from loguru import logger
 from loguru import logger
 from hashlib import md5
 from hashlib import md5
@@ -55,6 +56,63 @@ class Common:
 
 
         return logger
         return logger
 
 
+    # 写入阿里云日志
+    @staticmethod
+    def logging(log_type, crawler, env, message):
+        """
+        写入阿里云日志
+        测试库: https://sls.console.aliyun.com/lognext/project/crawler-log-dev/logsearch/crawler-log-dev
+        正式库: https://sls.console.aliyun.com/lognext/project/crawler-log-prod/logsearch/crawler-log-prod
+        :param log_type: 爬虫策略
+        :param crawler: 哪款爬虫
+        :param env: 环境
+        :param message:日志内容
+        :return: None
+        """
+        # 设置阿里云日志服务的访问信息
+        accessKeyId = 'LTAIWYUujJAm7CbH'
+        accessKey = 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P'
+        if env == "dev":
+            project = 'crawler-log-dev'
+            logstore = 'crawler-log-dev'
+            endpoint = 'cn-hangzhou.log.aliyuncs.com'
+        else:
+            project = 'crawler-log-prod'
+            logstore = 'crawler-log-prod'
+            endpoint = 'cn-hangzhou-intranet.log.aliyuncs.com'
+
+        # 创建 LogClient 实例
+        # print("创建 LogClient 实例")
+        client = LogClient(endpoint, accessKeyId, accessKey)
+
+
+        if '\r' in message:
+            message = message.replace('\r', ' ')
+        if '\n' in message:
+            message = message.replace('\n', ' ')
+        # print(f"message:{message}")
+        log_group = []
+        log_item = LogItem()
+        # print(f"log_item:{type(log_item), log_item}")
+        contents = [(f"{crawler}-{log_type}", message)]
+        # print(f"contents:{type(contents), contents}")
+        log_item.set_contents(contents)
+        log_group.append(log_item)
+        # print(f"log_group:{type(log_group), log_group}")
+
+        # 写入日志
+        # print("开始PutLogsRequest")
+        request = PutLogsRequest(project=project,
+                                 logstore=logstore,
+                                 topic="",
+                                 source="",
+                                 logitems=log_group,
+                                 compress=False)
+        # print(f"request:{request}")
+        # print("put_logs...")
+        client.put_logs(request)
+        # print("put_logs...done")
+
     # 清除日志,保留最近 10 个文件
     # 清除日志,保留最近 10 个文件
     @classmethod
     @classmethod
     def del_logs(cls, log_type, crawler):
     def del_logs(cls, log_type, crawler):
@@ -342,4 +400,3 @@ class Common:
 if __name__ == "__main__":
 if __name__ == "__main__":
     Common.tunnel_proxies()
     Common.tunnel_proxies()
     pass
     pass
-

+ 4 - 19
requirements.txt

@@ -1,32 +1,17 @@
-# pip3 install Appium-Python-Client
-Appium_Python_Client==2.10.1
-# 翻墙, pip3 install git+https://github.com/pyatom/pyatom/
+aliyun_python_sdk==2.2.0
+Appium_Python_Client==2.11.0
 atomac==1.2.0
 atomac==1.2.0
-# pip3 install ffmpeg-python
 ffmpeg==1.4
 ffmpeg==1.4
-# pip3 install loguru
+jieba==0.42.1
 loguru==0.6.0
 loguru==0.6.0
-# pip3 install lxml
 lxml==4.9.1
 lxml==4.9.1
-# pip3 install mq_http_sdk, 若您使用的SDK版本为v1.0.0,您需要安装大于等于2.5且小于3.0版本的Python。若您使用的SDK版本大于v1.0.0,您需要安装2.5及以上版本的Python。
 mq_http_sdk==1.0.3
 mq_http_sdk==1.0.3
-# sudo pip3 install oss2
 oss2==2.15.0
 oss2==2.15.0
-# pip3 install psutil
 psutil==5.9.2
 psutil==5.9.2
-# pip3 install PyExecJS
 PyExecJS==1.5.1
 PyExecJS==1.5.1
-# pip3 install PyMysql
 PyMySQL==1.0.2
 PyMySQL==1.0.2
-# pip3 install redis
 redis==4.5.1
 redis==4.5.1
-# pip3 install requests
 requests==2.27.1
 requests==2.27.1
-# pip3 install selenium
-selenium==4.9.1
-# pip3 install urllib3
+selenium==4.10.0
 urllib3==1.26.9
 urllib3==1.26.9
-# pip3 install jieba
-jieba==0.42.1
-# pip3 install workalendar
 workalendar==17.0.0
 workalendar==17.0.0

+ 19 - 0
xigua/xigua_author/xigua_author_scheduling.py

@@ -574,12 +574,15 @@ class XiguaauthorScheduling:
             offset += 30
             offset += 30
             if response.status_code != 200:
             if response.status_code != 200:
                 Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
                 Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
+                Common.logging(log_type, crawler, env, f"get_videolist_response:{response.text}\n")
                 return
                 return
             elif 'data' not in response.text:
             elif 'data' not in response.text:
                 Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
                 Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
+                Common.logging(log_type, crawler, env, f"get_videolist_response:{response.text}\n")
                 return
                 return
             elif not response.json()["data"]['videoList']:
             elif not response.json()["data"]['videoList']:
                 Common.logger(log_type, crawler).warning(f"没有更多数据啦~:{response.json()}\n")
                 Common.logger(log_type, crawler).warning(f"没有更多数据啦~:{response.json()}\n")
+                Common.logging(log_type, crawler, env, f"没有更多数据啦~:{response.json()}\n")
                 return
                 return
             feeds = response.json()['data']['videoList']
             feeds = response.json()['data']['videoList']
             for i in range(len(feeds)):
             for i in range(len(feeds)):
@@ -587,19 +590,24 @@ class XiguaauthorScheduling:
                     item_id = feeds[i].get("item_id", "")
                     item_id = feeds[i].get("item_id", "")
                     if item_id == "":
                     if item_id == "":
                         Common.logger(log_type, crawler).info("无效视频\n")
                         Common.logger(log_type, crawler).info("无效视频\n")
+                        Common.logging(log_type, crawler, env, "无效视频\n")
                         continue
                         continue
 
 
                     video_dict = cls.get_video_info(log_type, crawler, item_id)
                     video_dict = cls.get_video_info(log_type, crawler, item_id)
                     if video_dict is None:
                     if video_dict is None:
                         Common.logger(log_type, crawler).info("无效视频\n")
                         Common.logger(log_type, crawler).info("无效视频\n")
+                        Common.logging(log_type, crawler, env, "无效视频\n")
                         continue
                         continue
                     for k, v in video_dict.items():
                     for k, v in video_dict.items():
                         Common.logger(log_type, crawler).info(f"{k}:{v}")
                         Common.logger(log_type, crawler).info(f"{k}:{v}")
+                    Common.logging(log_type, crawler, env, f"{video_dict}")
                     if int((int(time.time()) - int(video_dict["publish_time_stamp"])) / (3600 * 24)) > int(rule_dict.get("period", {}).get("max", 1000)):
                     if int((int(time.time()) - int(video_dict["publish_time_stamp"])) / (3600 * 24)) > int(rule_dict.get("period", {}).get("max", 1000)):
                         Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
                         Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
+                        Common.logging(log_type, crawler, env, f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
                         return
                         return
                     if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                     if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                         Common.logger(log_type, crawler).info("不满足抓取规则\n")
                         Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                        Common.logging(log_type, crawler, env, "不满足抓取规则\n")
                     elif any(str(word) if str(word) in video_dict["video_title"] else False
                     elif any(str(word) if str(word) in video_dict["video_title"] else False
                              for word in get_config_from_mysql(log_type=log_type,
                              for word in get_config_from_mysql(log_type=log_type,
                                                                source=crawler,
                                                                source=crawler,
@@ -607,8 +615,10 @@ class XiguaauthorScheduling:
                                                                text="filter",
                                                                text="filter",
                                                                action="")) is True:
                                                                action="")) is True:
                         Common.logger(log_type, crawler).info('已中过滤词\n')
                         Common.logger(log_type, crawler).info('已中过滤词\n')
+                        Common.logging(log_type, crawler, env, "已中过滤词\n")
                     elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
                     elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
                         Common.logger(log_type, crawler).info('视频已下载\n')
                         Common.logger(log_type, crawler).info('视频已下载\n')
+                        Common.logging(log_type, crawler, env, "视频已下载\n")
                     else:
                     else:
                         cls.download_publish(log_type=log_type,
                         cls.download_publish(log_type=log_type,
                                              crawler=crawler,
                                              crawler=crawler,
@@ -618,6 +628,7 @@ class XiguaauthorScheduling:
                                              env=env)
                                              env=env)
                 except Exception as e:
                 except Exception as e:
                     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
                     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                    Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
     @classmethod
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, env):
     def repeat_video(cls, log_type, crawler, video_id, env):
         sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
         sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
@@ -639,11 +650,13 @@ class XiguaauthorScheduling:
                 # 删除视频文件夹
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
                 return
         except FileNotFoundError:
         except FileNotFoundError:
             # 删除视频文件夹
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
             return
         # 下载封面
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
         Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
@@ -652,6 +665,7 @@ class XiguaauthorScheduling:
 
 
         # 上传视频
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         if env == "dev":
         if env == "dev":
             oss_endpoint = "out"
             oss_endpoint = "out"
             our_video_id = Publish.upload_and_publish(log_type=log_type,
             our_video_id = Publish.upload_and_publish(log_type=log_type,
@@ -712,8 +726,10 @@ class XiguaauthorScheduling:
                         {int(video_dict['video_width'])},
                         {int(video_dict['video_width'])},
                         {int(video_dict['video_height'])}) """
                         {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
+        Common.logging(log_type, crawler, env, '视频信息写入数据库成功')
 
 
         # 视频写入飞书
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "e075e9", "ROWS", 1, 2)
         Feishu.insert_columns(log_type, crawler, "e075e9", "ROWS", 1, 2)
@@ -740,12 +756,14 @@ class XiguaauthorScheduling:
         time.sleep(0.5)
         time.sleep(0.5)
         Feishu.update_values(log_type, crawler, "e075e9", "F2:Z2", values)
         Feishu.update_values(log_type, crawler, "e075e9", "F2:Z2", values)
         Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
         Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
+        Common.logging(log_type, crawler, env, f"视频已保存至云文档\n")
 
 
     @classmethod
     @classmethod
     def get_author_videos(cls, log_type, crawler, user_list, rule_dict, env):
     def get_author_videos(cls, log_type, crawler, user_list, rule_dict, env):
         for user_dict in user_list:
         for user_dict in user_list:
             try:
             try:
                 Common.logger(log_type, crawler).info(f"开始抓取 {user_dict['nick_name']} 用户主页视频\n")
                 Common.logger(log_type, crawler).info(f"开始抓取 {user_dict['nick_name']} 用户主页视频\n")
+                Common.logging(log_type, crawler, env, f"开始抓取 {user_dict['nick_name']} 用户主页视频\n")
                 cls.get_videoList(log_type=log_type,
                 cls.get_videoList(log_type=log_type,
                                   crawler=crawler,
                                   crawler=crawler,
                                   user_dict=user_dict,
                                   user_dict=user_dict,
@@ -753,6 +771,7 @@ class XiguaauthorScheduling:
                                   env=env)
                                   env=env)
             except Exception as e:
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"抓取{user_dict['nick_name']}视频时异常:{e}\n")
                 Common.logger(log_type, crawler).error(f"抓取{user_dict['nick_name']}视频时异常:{e}\n")
+                Common.logging(log_type, crawler, env, f"抓取{user_dict['nick_name']}视频时异常:{e}\n")
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 20 - 0
xigua/xigua_main/run_xg_author.py

@@ -23,6 +23,10 @@ def main(log_type, crawler, topic_name, group_id, env):
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                           f'WaitSeconds:{wait_seconds}\n'
+                                           f'TopicName:{topic_name}\n'
+                                           f'MQConsumer:{group_id}')
     while True:
     while True:
         try:
         try:
             # 长轮询消费消息。
             # 长轮询消费消息。
@@ -38,6 +42,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                       f"MessageId:{msg.message_id}\n"
+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                       f"MessageTag:{msg.message_tag}\n"
+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
+                                                       f"PublishTime:{msg.publish_time}\n"
+                                                       f"Body:{msg.message_body}\n"
+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                       f"Properties:{msg.properties}")
                 # ack_mq_message
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
 
@@ -48,9 +62,12 @@ def main(log_type, crawler, topic_name, group_id, env):
                 select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
                 select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
                 user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
                 user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
                 Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
                 XiguaauthorScheduling.get_author_videos(log_type=log_type,
                 XiguaauthorScheduling.get_author_videos(log_type=log_type,
                                                         crawler=crawler,
                                                         crawler=crawler,
                                                         rule_dict=rule_dict,
                                                         rule_dict=rule_dict,
@@ -58,14 +75,17 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                         env=env)
                                                         env=env)
                 Common.del_logs(log_type, crawler)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
 
 
         except MQExceptionBase as err:
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
                 continue
 
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             time.sleep(2)
             continue
             continue
 
 

+ 20 - 0
xigua/xigua_main/run_xg_recommend.py

@@ -24,6 +24,10 @@ def main(log_type, crawler, topic_name, group_id, env):
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                          f'WaitSeconds:{wait_seconds}\n'
+                                          f'TopicName:{topic_name}\n'
+                                          f'MQConsumer:{group_id}')
     while True:
     while True:
         try:
         try:
             # 长轮询消费消息。
             # 长轮询消费消息。
@@ -39,6 +43,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                      f"MessageId:{msg.message_id}\n"
+                                                      f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                      f"MessageTag:{msg.message_tag}\n"
+                                                      f"ConsumedTimes:{msg.consumed_times}\n"
+                                                      f"PublishTime:{msg.publish_time}\n"
+                                                      f"Body:{msg.message_body}\n"
+                                                      f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                      f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                      f"Properties:{msg.properties}")
                 # ack_mq_message
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
 
@@ -53,9 +67,12 @@ def main(log_type, crawler, topic_name, group_id, env):
                     our_uid_list.append(user["uid"])
                     our_uid_list.append(user["uid"])
                 our_uid = random.choice(our_uid_list)
                 our_uid = random.choice(our_uid_list)
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
                 Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
                 XiguarecommendScheduling.get_videoList(log_type=log_type,
                 XiguarecommendScheduling.get_videoList(log_type=log_type,
                                                        crawler=crawler,
                                                        crawler=crawler,
                                                        rule_dict=rule_dict,
                                                        rule_dict=rule_dict,
@@ -63,14 +80,17 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                        env=env)
                                                        env=env)
                 Common.del_logs(log_type, crawler)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
 
 
         except MQExceptionBase as err:
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
                 continue
 
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             time.sleep(2)
             continue
             continue
 
 

+ 24 - 0
xigua/xigua_main/run_xg_recommend_dev.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/6/12
+import os
+import sys
+sys.path.append(os.getcwd())
+from common.common import Common
+from xigua.xigua_recommend.xigua_recommend_scheduling import XiguarecommendScheduling
+
+
+def main(log_type, crawler, env):
+    Common.logger(log_type, crawler).info(f'开始抓取 西瓜推荐\n')
+    Common.logging(log_type, crawler, env, "开始抓取 西瓜推荐\n")
+    XiguarecommendScheduling.get_videoList(log_type=log_type,
+                                           crawler=crawler,
+                                           rule_dict={},
+                                           our_uid=6267140,
+                                           env=env)
+    Common.logger(log_type, crawler).info("抓取一轮结束\n")
+    Common.logging(log_type, crawler, env, "抓取一轮结束\n")
+
+
+if __name__ == "__main__":
+    main("recommend", "xigua", "dev")

+ 20 - 0
xigua/xigua_main/run_xg_search.py

@@ -23,6 +23,10 @@ def main(log_type, crawler, topic_name, group_id, env):
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                           f'WaitSeconds:{wait_seconds}\n'
+                                           f'TopicName:{topic_name}\n'
+                                           f'MQConsumer:{group_id}')
     while True:
     while True:
         try:
         try:
             # 长轮询消费消息。
             # 长轮询消费消息。
@@ -38,6 +42,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                       f"MessageId:{msg.message_id}\n"
+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                       f"MessageTag:{msg.message_tag}\n"
+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
+                                                       f"PublishTime:{msg.publish_time}\n"
+                                                       f"Body:{msg.message_body}\n"
+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                       f"Properties:{msg.properties}")
                 # ack_mq_message
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
 
@@ -48,9 +62,12 @@ def main(log_type, crawler, topic_name, group_id, env):
                 select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
                 select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
                 user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
                 user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
                 Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
                 XiguasearchScheduling.get_search_videos(log_type=log_type,
                 XiguasearchScheduling.get_search_videos(log_type=log_type,
                                                         crawler=crawler,
                                                         crawler=crawler,
                                                         rule_dict=rule_dict,
                                                         rule_dict=rule_dict,
@@ -60,14 +77,17 @@ def main(log_type, crawler, topic_name, group_id, env):
                 os.system("ps aux | grep chromedriver | grep -v grep | awk '{print $2}' | xargs kill -9")
                 os.system("ps aux | grep chromedriver | grep -v grep | awk '{print $2}' | xargs kill -9")
                 Common.del_logs(log_type, crawler)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
 
 
         except MQExceptionBase as err:
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
                 continue
 
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             time.sleep(2)
             continue
             continue
 
 

+ 21 - 0
xigua/xigua_recommend/xigua_recommend_scheduling.py

@@ -605,10 +605,12 @@ class XiguarecommendScheduling:
         queryCount = 1
         queryCount = 1
         while True:
         while True:
             Common.logger(log_type, crawler).info(f"正在抓取第{queryCount}页视频")
             Common.logger(log_type, crawler).info(f"正在抓取第{queryCount}页视频")
+            Common.logging(log_type, crawler, env, f"正在抓取第{queryCount}页视频")
             try:
             try:
                 signature = cls.get_signature(env)
                 signature = cls.get_signature(env)
                 if signature is None:
                 if signature is None:
                     Common.logger(log_type, crawler).warning(f"signature:{signature}")
                     Common.logger(log_type, crawler).warning(f"signature:{signature}")
+                    Common.logging(log_type, crawler, env, f"signature:{signature}")
                     time.sleep(1)
                     time.sleep(1)
                     continue
                     continue
                 url = "https://www.ixigua.com/api/feedv2/feedById?"
                 url = "https://www.ixigua.com/api/feedv2/feedById?"
@@ -654,18 +656,23 @@ class XiguarecommendScheduling:
                 queryCount += 1
                 queryCount += 1
                 if response.status_code != 200:
                 if response.status_code != 200:
                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
+                    Common.logging(log_type, crawler, env, f"get_videolist_response:{response.text}\n")
                     return
                     return
                 elif 'data' not in response.text:
                 elif 'data' not in response.text:
                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
+                    Common.logging(log_type, crawler, env, f"get_videolist_response:{response.text}\n")
                     return
                     return
                 elif 'channelFeed' not in response.json()['data']:
                 elif 'channelFeed' not in response.json()['data']:
                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
+                    Common.logging(log_type, crawler, env, f"get_videolist_response:{response.json()}\n")
                     return
                     return
                 elif 'Data' not in response.json()['data']['channelFeed']:
                 elif 'Data' not in response.json()['data']['channelFeed']:
                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
+                    Common.logging(log_type, crawler, env, f"get_videolist_response:{response.json()}\n")
                     return
                     return
                 elif len(response.json()['data']['channelFeed']['Data']) == 0:
                 elif len(response.json()['data']['channelFeed']['Data']) == 0:
                     Common.logger(log_type, crawler).warning(f"没有更多数据啦 ~ :{response.json()}\n")
                     Common.logger(log_type, crawler).warning(f"没有更多数据啦 ~ :{response.json()}\n")
+                    Common.logging(log_type, crawler, env, f"没有更多数据啦 ~ :{response.json()}\n")
                     return
                     return
                 else:
                 else:
                     feeds = response.json()['data']['channelFeed']['Data']
                     feeds = response.json()['data']['channelFeed']['Data']
@@ -674,15 +681,19 @@ class XiguarecommendScheduling:
                             item_id = feeds[i].get("data", {}).get("item_id", "")
                             item_id = feeds[i].get("data", {}).get("item_id", "")
                             if item_id == "":
                             if item_id == "":
                                 Common.logger(log_type, crawler).info("无效视频\n")
                                 Common.logger(log_type, crawler).info("无效视频\n")
+                                Common.logging(log_type, crawler, env, "无效视频\n")
                                 continue
                                 continue
                             video_dict = cls.get_video_info(log_type, crawler, item_id)
                             video_dict = cls.get_video_info(log_type, crawler, item_id)
                             if video_dict is None:
                             if video_dict is None:
                                 Common.logger(log_type, crawler).info("无效视频\n")
                                 Common.logger(log_type, crawler).info("无效视频\n")
+                                Common.logging(log_type, crawler, env, "无效视频\n")
                                 continue
                                 continue
                             for k, v in video_dict.items():
                             for k, v in video_dict.items():
                                 Common.logger(log_type, crawler).info(f"{k}:{v}")
                                 Common.logger(log_type, crawler).info(f"{k}:{v}")
+                            Common.logging(log_type, crawler, env, f"{video_dict}")
                             if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                             if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                                 Common.logger(log_type, crawler).info("不满足抓取规则\n")
                                 Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                                Common.logging(log_type, crawler, env, "不满足抓取规则\n")
                             elif any(str(word) if str(word) in video_dict["video_title"] else False
                             elif any(str(word) if str(word) in video_dict["video_title"] else False
                                      for word in get_config_from_mysql(log_type=log_type,
                                      for word in get_config_from_mysql(log_type=log_type,
                                                                        source=crawler,
                                                                        source=crawler,
@@ -690,8 +701,10 @@ class XiguarecommendScheduling:
                                                                        text="filter",
                                                                        text="filter",
                                                                        action="")) is True:
                                                                        action="")) is True:
                                 Common.logger(log_type, crawler).info('已中过滤词\n')
                                 Common.logger(log_type, crawler).info('已中过滤词\n')
+                                Common.logging(log_type, crawler, env, "已中过滤词\n")
                             elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
                             elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
                                 Common.logger(log_type, crawler).info('视频已下载\n')
                                 Common.logger(log_type, crawler).info('视频已下载\n')
+                                Common.logging(log_type, crawler, env, "视频已下载\n")
                             else:
                             else:
                                 cls.download_publish(log_type=log_type,
                                 cls.download_publish(log_type=log_type,
                                                      crawler=crawler,
                                                      crawler=crawler,
@@ -701,8 +714,10 @@ class XiguarecommendScheduling:
                                                      env=env)
                                                      env=env)
                         except Exception as e:
                         except Exception as e:
                             Common.logger(log_type, crawler).error(f"抓取单条视频时异常:{e}\n")
                             Common.logger(log_type, crawler).error(f"抓取单条视频时异常:{e}\n")
+                            Common.logging(log_type, crawler, env, f"抓取单条视频时异常:{e}\n")
             except Exception as e:
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"抓取第{queryCount}页时异常:{e}\n")
                 Common.logger(log_type, crawler).error(f"抓取第{queryCount}页时异常:{e}\n")
+                Common.logging(log_type, crawler, env, f"抓取第{queryCount}页时异常:{e}\n")
 
 
     @classmethod
     @classmethod
     def download_publish(cls, log_type, crawler, our_uid, video_dict, rule_dict, env):
     def download_publish(cls, log_type, crawler, our_uid, video_dict, rule_dict, env):
@@ -718,11 +733,13 @@ class XiguarecommendScheduling:
                 # 删除视频文件夹
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
                 return
         except FileNotFoundError:
         except FileNotFoundError:
             # 删除视频文件夹
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
             return
         # 下载封面
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
         Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
@@ -731,6 +748,7 @@ class XiguarecommendScheduling:
 
 
         # 上传视频
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         if env == "dev":
         if env == "dev":
             oss_endpoint = "out"
             oss_endpoint = "out"
             our_video_id = Publish.upload_and_publish(log_type=log_type,
             our_video_id = Publish.upload_and_publish(log_type=log_type,
@@ -791,8 +809,10 @@ class XiguarecommendScheduling:
                                         {int(video_dict['video_width'])},
                                         {int(video_dict['video_width'])},
                                         {int(video_dict['video_height'])}) """
                                         {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
         MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
+        Common.logging(log_type, crawler, env, f"视频信息写入数据库成功")
 
 
         # 视频写入飞书
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "1iKGF1", "ROWS", 1, 2)
         Feishu.insert_columns(log_type, crawler, "1iKGF1", "ROWS", 1, 2)
@@ -819,6 +839,7 @@ class XiguarecommendScheduling:
         time.sleep(1)
         time.sleep(1)
         Feishu.update_values(log_type, 'xigua', "1iKGF1", "F2:Z2", values)
         Feishu.update_values(log_type, 'xigua', "1iKGF1", "F2:Z2", values)
         Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
         Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
+        Common.logging(log_type, crawler, env, f"视频已保存至云文档\n")
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":

+ 20 - 0
xigua/xigua_search/xigua_search_scheduling.py

@@ -565,6 +565,7 @@ class XiguasearchScheduling:
         driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(chromedriver))
         driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(chromedriver))
         driver.implicitly_wait(10)
         driver.implicitly_wait(10)
         Common.logger(log_type, crawler).info(f"打开搜索页:{user_dict['link']}")
         Common.logger(log_type, crawler).info(f"打开搜索页:{user_dict['link']}")
+        Common.logging(log_type, crawler, env, f"打开搜索页:{user_dict['link']}")
         driver.get(f"https://www.ixigua.com/search/{user_dict['link']}/")
         driver.get(f"https://www.ixigua.com/search/{user_dict['link']}/")
         time.sleep(3)
         time.sleep(3)
         # driver.get_screenshot_as_file(f"./{crawler}/logs/打开搜索页.jpg")
         # driver.get_screenshot_as_file(f"./{crawler}/logs/打开搜索页.jpg")
@@ -594,20 +595,24 @@ class XiguasearchScheduling:
             video_element_temp = video_elements[index:]
             video_element_temp = video_elements[index:]
             if len(video_element_temp) == 0:
             if len(video_element_temp) == 0:
                 Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
                 Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
+                Common.logging(log_type, crawler, env, '到底啦~~~~~~~~~~~~~\n')
                 driver.quit()
                 driver.quit()
                 return
                 return
             for i, video_element in enumerate(video_element_temp):
             for i, video_element in enumerate(video_element_temp):
                 try:
                 try:
                     if cls.download_cnt >= int(rule_dict.get("videos_cnt", {}).get("min", 30)):
                     if cls.download_cnt >= int(rule_dict.get("videos_cnt", {}).get("min", 30)):
                         Common.logger(log_type, crawler).info(f"搜索词: {user_dict['link']},已下载视频数: {cls.download_cnt}\n")
                         Common.logger(log_type, crawler).info(f"搜索词: {user_dict['link']},已下载视频数: {cls.download_cnt}\n")
+                        Common.logging(log_type, crawler, env, f"搜索词: {user_dict['link']},已下载视频数: {cls.download_cnt}\n")
                         driver.quit()
                         driver.quit()
                         return
                         return
                     if video_element is None:
                     if video_element is None:
                         Common.logger(log_type, crawler).info('到底啦~\n')
                         Common.logger(log_type, crawler).info('到底啦~\n')
+                        Common.logging(log_type, crawler, env, '到底啦~\n')
                         driver.quit()
                         driver.quit()
                         return
                         return
                     num += 1
                     num += 1
                     Common.logger(log_type, crawler).info(f'拖动"视频"列表第{num}个至屏幕中间')
                     Common.logger(log_type, crawler).info(f'拖动"视频"列表第{num}个至屏幕中间')
+                    Common.logging(log_type, crawler, env, f'拖动"视频"列表第{num}个至屏幕中间')
                     driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
                     driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
                     time.sleep(3)
                     time.sleep(3)
                     # driver.get_screenshot_as_file(f"./{crawler}/logs/{num}.jpg")
                     # driver.get_screenshot_as_file(f"./{crawler}/logs/{num}.jpg")
@@ -616,15 +621,18 @@ class XiguasearchScheduling:
                     video_dict = cls.get_video_info(log_type, crawler, item_id)
                     video_dict = cls.get_video_info(log_type, crawler, item_id)
                     if video_dict is None:
                     if video_dict is None:
                         Common.logger(log_type, crawler).info("无效视频\n")
                         Common.logger(log_type, crawler).info("无效视频\n")
+                        Common.logging(log_type, crawler, env, "无效视频\n")
                         continue
                         continue
                     for k, v in video_dict.items():
                     for k, v in video_dict.items():
                         Common.logger(log_type, crawler).info(f"{k}:{v}")
                         Common.logger(log_type, crawler).info(f"{k}:{v}")
+                    Common.logging(log_type, crawler, env, f"{video_dict}")
                     # if int((int(time.time()) - int(video_dict["publish_time_stamp"])) / (3600 * 24)) > int(rule_dict.get("period", {}).get("max", 1000)):
                     # if int((int(time.time()) - int(video_dict["publish_time_stamp"])) / (3600 * 24)) > int(rule_dict.get("period", {}).get("max", 1000)):
                     #     Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
                     #     Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
                     #     driver.quit()
                     #     driver.quit()
                     #     return
                     #     return
                     if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                     if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                         Common.logger(log_type, crawler).info("不满足抓取规则\n")
                         Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                        Common.logging(log_type, crawler, env, "不满足抓取规则\n")
                     elif any(str(word) if str(word) in video_dict["video_title"] else False
                     elif any(str(word) if str(word) in video_dict["video_title"] else False
                              for word in get_config_from_mysql(log_type=log_type,
                              for word in get_config_from_mysql(log_type=log_type,
                                                                source=crawler,
                                                                source=crawler,
@@ -632,8 +640,10 @@ class XiguasearchScheduling:
                                                                text="filter",
                                                                text="filter",
                                                                action="")) is True:
                                                                action="")) is True:
                         Common.logger(log_type, crawler).info('已中过滤词\n')
                         Common.logger(log_type, crawler).info('已中过滤词\n')
+                        Common.logging(log_type, crawler, env, '已中过滤词\n')
                     elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
                     elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
                         Common.logger(log_type, crawler).info('视频已下载\n')
                         Common.logger(log_type, crawler).info('视频已下载\n')
+                        Common.logging(log_type, crawler, env, '视频已下载\n')
                     else:
                     else:
                         cls.download_publish(log_type=log_type,
                         cls.download_publish(log_type=log_type,
                                              crawler=crawler,
                                              crawler=crawler,
@@ -643,8 +653,10 @@ class XiguasearchScheduling:
                                              env=env)
                                              env=env)
                 except Exception as e:
                 except Exception as e:
                     Common.logger(log_type, crawler).warning(f"抓取单条视频异常:{e}\n")
                     Common.logger(log_type, crawler).warning(f"抓取单条视频异常:{e}\n")
+                    Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
 
 
             Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
             Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
+            Common.logging(log_type, crawler, env, '已抓取完一组视频,休眠10秒\n')
             time.sleep(10)
             time.sleep(10)
             index = index + len(video_element_temp)
             index = index + len(video_element_temp)
 
 
@@ -672,11 +684,13 @@ class XiguasearchScheduling:
                 # 删除视频文件夹
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
                 return
         except FileNotFoundError:
         except FileNotFoundError:
             # 删除视频文件夹
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
             return
         # 下载封面
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text='cover',
         Common.download_method(log_type=log_type, crawler=crawler, text='cover',
@@ -686,6 +700,7 @@ class XiguasearchScheduling:
 
 
         # 上传视频
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         if env == "dev":
         if env == "dev":
             oss_endpoint = "out"
             oss_endpoint = "out"
             our_video_id = Publish.upload_and_publish(log_type=log_type,
             our_video_id = Publish.upload_and_publish(log_type=log_type,
@@ -746,9 +761,11 @@ class XiguasearchScheduling:
                                 {int(video_dict['video_width'])},
                                 {int(video_dict['video_width'])},
                                 {int(video_dict['video_height'])}) """
                                 {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
         cls.download_cnt += 1
         cls.download_cnt += 1
         Common.logger(log_type, crawler).info("视频信息写入数据库完成")
         Common.logger(log_type, crawler).info("视频信息写入数据库完成")
+        Common.logging(log_type, crawler, env, "视频信息写入数据库完成")
 
 
         # 视频信息写入飞书
         # 视频信息写入飞书
         Feishu.insert_columns(log_type, crawler, "BUNvGC", "ROWS", 1, 2)
         Feishu.insert_columns(log_type, crawler, "BUNvGC", "ROWS", 1, 2)
@@ -775,6 +792,7 @@ class XiguasearchScheduling:
         time.sleep(0.5)
         time.sleep(0.5)
         Feishu.update_values(log_type, crawler, "BUNvGC", "E2:Z2", values)
         Feishu.update_values(log_type, crawler, "BUNvGC", "E2:Z2", values)
         Common.logger(log_type, crawler).info('视频信息写入飞书完成\n')
         Common.logger(log_type, crawler).info('视频信息写入飞书完成\n')
+        Common.logging(log_type, crawler, env, '视频信息写入飞书完成\n')
 
 
     @classmethod
     @classmethod
     def get_search_videos(cls, log_type, crawler, user_list, rule_dict, env):
     def get_search_videos(cls, log_type, crawler, user_list, rule_dict, env):
@@ -782,6 +800,7 @@ class XiguasearchScheduling:
             try:
             try:
                 cls.download_cnt = 0
                 cls.download_cnt = 0
                 Common.logger(log_type, crawler).info(f"开始抓取 {user_dict['link']} 视频\n")
                 Common.logger(log_type, crawler).info(f"开始抓取 {user_dict['link']} 视频\n")
+                Common.logging(log_type, crawler, env, f"开始抓取 {user_dict['link']} 视频\n")
                 cls.get_videoList(log_type=log_type,
                 cls.get_videoList(log_type=log_type,
                                   crawler=crawler,
                                   crawler=crawler,
                                   user_dict=user_dict,
                                   user_dict=user_dict,
@@ -789,6 +808,7 @@ class XiguasearchScheduling:
                                   env=env)
                                   env=env)
             except Exception as e:
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"抓取{user_dict['link']}视频时异常:{e}\n")
                 Common.logger(log_type, crawler).error(f"抓取{user_dict['link']}视频时异常:{e}\n")
+                Common.logging(log_type, crawler, env, f"抓取{user_dict['link']}视频时异常:{e}\n")
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':