2 yıl önce · 08f0f609a9
--- a/README.MD
+++ b/README.MD
@@ -161,4 +161,44 @@ ps aux | grep xigua | grep -v grep | awk '{print $2}' | xargs kill -9
 
															 ps aux | grep kuaishou | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															 ps aux | grep douyin | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															 ps aux | grep xiaoniangao | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															+```
														
 
															+
														
 
															+#### 生成 requirements.txt
														
 
															+```commandline
														
 
															+cd ./piaoquan_crawler && pipreqs ./ --force
														
 
															+
														
 
															+# pip3 install Appium-Python-Client
														
 
															+Appium_Python_Client==2.10.1
														
 
															+# 翻墙, pip3 install git+https://github.com/pyatom/pyatom/
														
 
															+atomac==1.2.0
														
 
															+# pip3 install ffmpeg-python
														
 
															+ffmpeg==1.4
														
 
															+# pip3 install loguru
														
 
															+loguru==0.6.0
														
 
															+# pip3 install lxml
														
 
															+lxml==4.9.1
														
 
															+# pip3 install mq_http_sdk, 若您使用的SDK版本为v1.0.0，您需要安装大于等于2.5且小于3.0版本的Python。若您使用的SDK版本大于v1.0.0，您需要安装2.5及以上版本的Python。
														
 
															+mq_http_sdk==1.0.3
														
 
															+# sudo pip3 install oss2
														
 
															+oss2==2.15.0
														
 
															+# pip3 install psutil
														
 
															+psutil==5.9.2
														
 
															+# pip3 install PyExecJS
														
 
															+PyExecJS==1.5.1
														
 
															+# pip3 install PyMysql
														
 
															+PyMySQL==1.0.2
														
 
															+# pip3 install redis
														
 
															+redis==4.5.1
														
 
															+# pip3 install requests
														
 
															+requests==2.27.1
														
 
															+# pip3 install selenium
														
 
															+selenium==4.9.1
														
 
															+# pip3 install urllib3
														
 
															+urllib3==1.26.9
														
 
															+# pip3 install jieba
														
 
															+jieba==0.42.1
														
 
															+# pip3 install workalendar
														
 
															+workalendar==17.0.0
														
 
															+# pip3 install aliyun_python_sdk
														
 
															+aliyun_python_sdk==2.2.0
														
 
															 ```
														
--- a/common/common.py
+++ b/common/common.py
@@ -4,6 +4,7 @@
 
															 """
														
 
															 公共方法，包含：生成log / 删除log / 下载方法 / 删除 weixinzhishu_chlsfiles / 过滤词库 / 保存视频信息至本地 txt / 翻译 / ffmpeg
														
 
															 """
														
 
															+from aliyun.log import LogClient, PutLogsRequest, LogItem
														
 
															 from datetime import date, timedelta
														
 
															 from loguru import logger
														
 
															 from hashlib import md5
														
@@ -55,6 +56,63 @@ class Common:
 
															         return logger
														
 
															+    # 写入阿里云日志
														
 
															+    @staticmethod
														
 
															+    def logging(log_type, crawler, env, message):
														
 
															+        """
														
 
															+        写入阿里云日志
														
 
															+        测试库: https://sls.console.aliyun.com/lognext/project/crawler-log-dev/logsearch/crawler-log-dev
														
 
															+        正式库: https://sls.console.aliyun.com/lognext/project/crawler-log-prod/logsearch/crawler-log-prod
														
 
															+        :param log_type: 爬虫策略
														
 
															+        :param crawler: 哪款爬虫
														
 
															+        :param env: 环境
														
 
															+        :param message:日志内容
														
 
															+        :return: None
														
 
															+        """
														
 
															+        # 设置阿里云日志服务的访问信息
														
 
															+        accessKeyId = 'LTAIWYUujJAm7CbH'
														
 
															+        accessKey = 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P'
														
 
															+        if env == "dev":
														
 
															+            project = 'crawler-log-dev'
														
 
															+            logstore = 'crawler-log-dev'
														
 
															+            endpoint = 'cn-hangzhou.log.aliyuncs.com'
														
 
															+        else:
														
 
															+            project = 'crawler-log-prod'
														
 
															+            logstore = 'crawler-log-prod'
														
 
															+            endpoint = 'cn-hangzhou-intranet.log.aliyuncs.com'
														
 
															+
														
 
															+        # 创建 LogClient 实例
														
 
															+        # print("创建 LogClient 实例")
														
 
															+        client = LogClient(endpoint, accessKeyId, accessKey)
														
 
															+
														
 
															+
														
 
															+        if '\r' in message:
														
 
															+            message = message.replace('\r', ' ')
														
 
															+        if '\n' in message:
														
 
															+            message = message.replace('\n', ' ')
														
 
															+        # print(f"message:{message}")
														
 
															+        log_group = []
														
 
															+        log_item = LogItem()
														
 
															+        # print(f"log_item:{type(log_item), log_item}")
														
 
															+        contents = [(f"{crawler}-{log_type}", message)]
														
 
															+        # print(f"contents:{type(contents), contents}")
														
 
															+        log_item.set_contents(contents)
														
 
															+        log_group.append(log_item)
														
 
															+        # print(f"log_group:{type(log_group), log_group}")
														
 
															+
														
 
															+        # 写入日志
														
 
															+        # print("开始PutLogsRequest")
														
 
															+        request = PutLogsRequest(project=project,
														
 
															+                                 logstore=logstore,
														
 
															+                                 topic="",
														
 
															+                                 source="",
														
 
															+                                 logitems=log_group,
														
 
															+                                 compress=False)
														
 
															+        # print(f"request:{request}")
														
 
															+        # print("put_logs...")
														
 
															+        client.put_logs(request)
														
 
															+        # print("put_logs...done")
														
 
															+
														
 
															     # 清除日志，保留最近 10 个文件
														
 
															     @classmethod
														
 
															     def del_logs(cls, log_type, crawler):
														
@@ -342,4 +400,3 @@ class Common:
 
															 if __name__ == "__main__":
														
 
															     Common.tunnel_proxies()
														
 
															     pass
														
 
															-
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,32 +1,17 @@
 
															-# pip3 install Appium-Python-Client
														
 
															-Appium_Python_Client==2.10.1
														
 
															-# 翻墙, pip3 install git+https://github.com/pyatom/pyatom/
														
 
															+aliyun_python_sdk==2.2.0
														
 
															+Appium_Python_Client==2.11.0
														
 
															 atomac==1.2.0
														
 
															-# pip3 install ffmpeg-python
														
 
															 ffmpeg==1.4
														
 
															-# pip3 install loguru
														
 
															+jieba==0.42.1
														
 
															 loguru==0.6.0
														
 
															-# pip3 install lxml
														
 
															 lxml==4.9.1
														
 
															-# pip3 install mq_http_sdk, 若您使用的SDK版本为v1.0.0，您需要安装大于等于2.5且小于3.0版本的Python。若您使用的SDK版本大于v1.0.0，您需要安装2.5及以上版本的Python。
														
 
															 mq_http_sdk==1.0.3
														
 
															-# sudo pip3 install oss2
														
 
															 oss2==2.15.0
														
 
															-# pip3 install psutil
														
 
															 psutil==5.9.2
														
 
															-# pip3 install PyExecJS
														
 
															 PyExecJS==1.5.1
														
 
															-# pip3 install PyMysql
														
 
															 PyMySQL==1.0.2
														
 
															-# pip3 install redis
														
 
															 redis==4.5.1
														
 
															-# pip3 install requests
														
 
															 requests==2.27.1
														
 
															-# pip3 install selenium
														
 
															-selenium==4.9.1
														
 
															-# pip3 install urllib3
														
 
															+selenium==4.10.0
														
 
															 urllib3==1.26.9
														
 
															-# pip3 install jieba
														
 
															-jieba==0.42.1
														
 
															-# pip3 install workalendar
														
 
															 workalendar==17.0.0
														
--- a/xigua/xigua_author/xigua_author_scheduling.py
+++ b/xigua/xigua_author/xigua_author_scheduling.py
@@ -574,12 +574,15 @@ class XiguaauthorScheduling:
 
															             offset += 30
														
 
															             if response.status_code != 200:
														
 
															                 Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
														
 
															+                Common.logging(log_type, crawler, env, f"get_videolist_response:{response.text}\n")
														
 
															                 return
														
 
															             elif 'data' not in response.text:
														
 
															                 Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
														
 
															+                Common.logging(log_type, crawler, env, f"get_videolist_response:{response.text}\n")
														
 
															                 return
														
 
															             elif not response.json()["data"]['videoList']:
														
 
															                 Common.logger(log_type, crawler).warning(f"没有更多数据啦~:{response.json()}\n")
														
 
															+                Common.logging(log_type, crawler, env, f"没有更多数据啦~:{response.json()}\n")
														
 
															                 return
														
 
															             feeds = response.json()['data']['videoList']
														
 
															             for i in range(len(feeds)):
														
@@ -587,19 +590,24 @@ class XiguaauthorScheduling:
 
															                     item_id = feeds[i].get("item_id", "")
														
 
															                     if item_id == "":
														
 
															                         Common.logger(log_type, crawler).info("无效视频\n")
														
 
															+                        Common.logging(log_type, crawler, env, "无效视频\n")
														
 
															                         continue
														
 
															                     video_dict = cls.get_video_info(log_type, crawler, item_id)
														
 
															                     if video_dict is None:
														
 
															                         Common.logger(log_type, crawler).info("无效视频\n")
														
 
															+                        Common.logging(log_type, crawler, env, "无效视频\n")
														
 
															                         continue
														
 
															                     for k, v in video_dict.items():
														
 
															                         Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															+                    Common.logging(log_type, crawler, env, f"{video_dict}")
														
 
															                     if int((int(time.time()) - int(video_dict["publish_time_stamp"])) / (3600 * 24)) > int(rule_dict.get("period", {}).get("max", 1000)):
														
 
															                         Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
														
 
															+                        Common.logging(log_type, crawler, env, f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
														
 
															                         return
														
 
															                     if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
														
 
															                         Common.logger(log_type, crawler).info("不满足抓取规则\n")
														
 
															+                        Common.logging(log_type, crawler, env, "不满足抓取规则\n")
														
 
															                     elif any(str(word) if str(word) in video_dict["video_title"] else False
														
 
															                              for word in get_config_from_mysql(log_type=log_type,
														
 
															                                                                source=crawler,
														
@@ -607,8 +615,10 @@ class XiguaauthorScheduling:
 
															                                                                text="filter",
														
 
															                                                                action="")) is True:
														
 
															                         Common.logger(log_type, crawler).info('已中过滤词\n')
														
 
															+                        Common.logging(log_type, crawler, env, "已中过滤词\n")
														
 
															                     elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
														
 
															                         Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															+                        Common.logging(log_type, crawler, env, "视频已下载\n")
														
 
															                     else:
														
 
															                         cls.download_publish(log_type=log_type,
														
 
															                                              crawler=crawler,
														
@@ -618,6 +628,7 @@ class XiguaauthorScheduling:
 
															                                              env=env)
														
 
															                 except Exception as e:
														
 
															                     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
														
 
															+                    Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
														
 
															     @classmethod
														
 
															     def repeat_video(cls, log_type, crawler, video_id, env):
														
 
															         sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
														
@@ -639,11 +650,13 @@ class XiguaauthorScheduling:
 
															                 # 删除视频文件夹
														
 
															                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															                 Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															+                Common.logging(log_type, crawler, env, "视频size=0，删除成功\n")
														
 
															                 return
														
 
															         except FileNotFoundError:
														
 
															             # 删除视频文件夹
														
 
															             shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															             Common.logger(log_type, crawler).info("视频文件不存在，删除文件夹成功\n")
														
 
															+            Common.logging(log_type, crawler, env, "视频文件不存在，删除文件夹成功\n")
														
 
															             return
														
 
															         # 下载封面
														
 
															         Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
														
@@ -652,6 +665,7 @@ class XiguaauthorScheduling:
 
															         # 上传视频
														
 
															         Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															+        Common.logging(log_type, crawler, env, "开始上传视频...")
														
 
															         if env == "dev":
														
 
															             oss_endpoint = "out"
														
 
															             our_video_id = Publish.upload_and_publish(log_type=log_type,
														
@@ -712,8 +726,10 @@ class XiguaauthorScheduling:
 
															                         {int(video_dict['video_width'])},
														
 
															                         {int(video_dict['video_height'])}) """
														
 
															         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
														
 
															         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
														
 
															         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
														
 
															+        Common.logging(log_type, crawler, env, '视频信息写入数据库成功')
														
 
															         # 视频写入飞书
														
 
															         Feishu.insert_columns(log_type, crawler, "e075e9", "ROWS", 1, 2)
														
@@ -740,12 +756,14 @@ class XiguaauthorScheduling:
 
															         time.sleep(0.5)
														
 
															         Feishu.update_values(log_type, crawler, "e075e9", "F2:Z2", values)
														
 
															         Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
														
 
															+        Common.logging(log_type, crawler, env, f"视频已保存至云文档\n")
														
 
															     @classmethod
														
 
															     def get_author_videos(cls, log_type, crawler, user_list, rule_dict, env):
														
 
															         for user_dict in user_list:
														
 
															             try:
														
 
															                 Common.logger(log_type, crawler).info(f"开始抓取 {user_dict['nick_name']} 用户主页视频\n")
														
 
															+                Common.logging(log_type, crawler, env, f"开始抓取 {user_dict['nick_name']} 用户主页视频\n")
														
 
															                 cls.get_videoList(log_type=log_type,
														
 
															                                   crawler=crawler,
														
 
															                                   user_dict=user_dict,
														
@@ -753,6 +771,7 @@ class XiguaauthorScheduling:
 
															                                   env=env)
														
 
															             except Exception as e:
														
 
															                 Common.logger(log_type, crawler).error(f"抓取{user_dict['nick_name']}视频时异常:{e}\n")
														
 
															+                Common.logging(log_type, crawler, env, f"抓取{user_dict['nick_name']}视频时异常:{e}\n")
														
 
															 if __name__ == '__main__':
														
--- a/xigua/xigua_main/run_xg_author.py
+++ b/xigua/xigua_main/run_xg_author.py
@@ -23,6 +23,10 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                                           f'WaitSeconds:{wait_seconds}\n'
														
 
															                                           f'TopicName:{topic_name}\n'
														
 
															                                           f'MQConsumer:{group_id}')
														
 
															+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
														
 
															+                                           f'WaitSeconds:{wait_seconds}\n'
														
 
															+                                           f'TopicName:{topic_name}\n'
														
 
															+                                           f'MQConsumer:{group_id}')
														
 
															     while True:
														
 
															         try:
														
 
															             # 长轮询消费消息。
														
@@ -38,6 +42,16 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
														
 
															                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
														
 
															                                                       f"Properties:{msg.properties}")
														
 
															+                Common.logging(log_type, crawler, env, f"Receive\n"
														
 
															+                                                       f"MessageId:{msg.message_id}\n"
														
 
															+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
														
 
															+                                                       f"MessageTag:{msg.message_tag}\n"
														
 
															+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
														
 
															+                                                       f"PublishTime:{msg.publish_time}\n"
														
 
															+                                                       f"Body:{msg.message_body}\n"
														
 
															+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
														
 
															+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
														
 
															+                                                       f"Properties:{msg.properties}")
														
 
															                 # ack_mq_message
														
 
															                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
														
@@ -48,9 +62,12 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                 select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
														
 
															                 user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
														
 
															+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
														
 
															                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
														
 
															+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
														
 
															                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
														
 
															                 Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
														
 
															+                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
														
 
															                 XiguaauthorScheduling.get_author_videos(log_type=log_type,
														
 
															                                                         crawler=crawler,
														
 
															                                                         rule_dict=rule_dict,
														
@@ -58,14 +75,17 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                                                         env=env)
														
 
															                 Common.del_logs(log_type, crawler)
														
 
															                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
														
 
															+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
														
 
															         except MQExceptionBase as err:
														
 
															             # Topic中没有消息可消费。
														
 
															             if err.type == "MessageNotExist":
														
 
															                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
														
 
															+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
														
 
															                 continue
														
 
															             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
														
 
															+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
														
 
															             time.sleep(2)
														
 
															             continue
														
--- a/xigua/xigua_main/run_xg_recommend.py
+++ b/xigua/xigua_main/run_xg_recommend.py
@@ -24,6 +24,10 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                                           f'WaitSeconds:{wait_seconds}\n'
														
 
															                                           f'TopicName:{topic_name}\n'
														
 
															                                           f'MQConsumer:{group_id}')
														
 
															+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
														
 
															+                                          f'WaitSeconds:{wait_seconds}\n'
														
 
															+                                          f'TopicName:{topic_name}\n'
														
 
															+                                          f'MQConsumer:{group_id}')
														
 
															     while True:
														
 
															         try:
														
 
															             # 长轮询消费消息。
														
@@ -39,6 +43,16 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
														
 
															                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
														
 
															                                                       f"Properties:{msg.properties}")
														
 
															+                Common.logging(log_type, crawler, env, f"Receive\n"
														
 
															+                                                      f"MessageId:{msg.message_id}\n"
														
 
															+                                                      f"MessageBodyMD5:{msg.message_body_md5}\n"
														
 
															+                                                      f"MessageTag:{msg.message_tag}\n"
														
 
															+                                                      f"ConsumedTimes:{msg.consumed_times}\n"
														
 
															+                                                      f"PublishTime:{msg.publish_time}\n"
														
 
															+                                                      f"Body:{msg.message_body}\n"
														
 
															+                                                      f"NextConsumeTime:{msg.next_consume_time}\n"
														
 
															+                                                      f"ReceiptHandle:{msg.receipt_handle}\n"
														
 
															+                                                      f"Properties:{msg.properties}")
														
 
															                 # ack_mq_message
														
 
															                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
														
@@ -53,9 +67,12 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                     our_uid_list.append(user["uid"])
														
 
															                 our_uid = random.choice(our_uid_list)
														
 
															                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
														
 
															+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
														
 
															                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
														
 
															+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
														
 
															                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
														
 
															                 Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
														
 
															+                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
														
 
															                 XiguarecommendScheduling.get_videoList(log_type=log_type,
														
 
															                                                        crawler=crawler,
														
 
															                                                        rule_dict=rule_dict,
														
@@ -63,14 +80,17 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                                                        env=env)
														
 
															                 Common.del_logs(log_type, crawler)
														
 
															                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
														
 
															+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
														
 
															         except MQExceptionBase as err:
														
 
															             # Topic中没有消息可消费。
														
 
															             if err.type == "MessageNotExist":
														
 
															                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
														
 
															+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
														
 
															                 continue
														
 
															             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
														
 
															+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
														
 
															             time.sleep(2)
														
 
															             continue
														
--- a/xigua/xigua_main/run_xg_recommend_dev.py
+++ b/xigua/xigua_main/run_xg_recommend_dev.py
@@ -0,0 +1,24 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+# @Author: wangkun
														
 
															+# @Time: 2023/6/12
														
 
															+import os
														
 
															+import sys
														
 
															+sys.path.append(os.getcwd())
														
 
															+from common.common import Common
														
 
															+from xigua.xigua_recommend.xigua_recommend_scheduling import XiguarecommendScheduling
														
 
															+
														
 
															+
														
 
															+def main(log_type, crawler, env):
														
 
															+    Common.logger(log_type, crawler).info(f'开始抓取 西瓜推荐\n')
														
 
															+    Common.logging(log_type, crawler, env, "开始抓取 西瓜推荐\n")
														
 
															+    XiguarecommendScheduling.get_videoList(log_type=log_type,
														
 
															+                                           crawler=crawler,
														
 
															+                                           rule_dict={},
														
 
															+                                           our_uid=6267140,
														
 
															+                                           env=env)
														
 
															+    Common.logger(log_type, crawler).info("抓取一轮结束\n")
														
 
															+    Common.logging(log_type, crawler, env, "抓取一轮结束\n")
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    main("recommend", "xigua", "dev")
														
--- a/xigua/xigua_main/run_xg_search.py
+++ b/xigua/xigua_main/run_xg_search.py
@@ -23,6 +23,10 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                                           f'WaitSeconds:{wait_seconds}\n'
														
 
															                                           f'TopicName:{topic_name}\n'
														
 
															                                           f'MQConsumer:{group_id}')
														
 
															+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
														
 
															+                                           f'WaitSeconds:{wait_seconds}\n'
														
 
															+                                           f'TopicName:{topic_name}\n'
														
 
															+                                           f'MQConsumer:{group_id}')
														
 
															     while True:
														
 
															         try:
														
 
															             # 长轮询消费消息。
														
@@ -38,6 +42,16 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
														
 
															                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
														
 
															                                                       f"Properties:{msg.properties}")
														
 
															+                Common.logging(log_type, crawler, env, f"Receive\n"
														
 
															+                                                       f"MessageId:{msg.message_id}\n"
														
 
															+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
														
 
															+                                                       f"MessageTag:{msg.message_tag}\n"
														
 
															+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
														
 
															+                                                       f"PublishTime:{msg.publish_time}\n"
														
 
															+                                                       f"Body:{msg.message_body}\n"
														
 
															+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
														
 
															+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
														
 
															+                                                       f"Properties:{msg.properties}")
														
 
															                 # ack_mq_message
														
 
															                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
														
@@ -48,9 +62,12 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                 select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
														
 
															                 user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
														
 
															                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
														
 
															+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
														
 
															                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
														
 
															+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
														
 
															                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
														
 
															                 Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
														
 
															+                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
														
 
															                 XiguasearchScheduling.get_search_videos(log_type=log_type,
														
 
															                                                         crawler=crawler,
														
 
															                                                         rule_dict=rule_dict,
														
@@ -60,14 +77,17 @@ def main(log_type, crawler, topic_name, group_id, env):
 
															                 os.system("ps aux | grep chromedriver | grep -v grep | awk '{print $2}' | xargs kill -9")
														
 
															                 Common.del_logs(log_type, crawler)
														
 
															                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
														
 
															+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
														
 
															         except MQExceptionBase as err:
														
 
															             # Topic中没有消息可消费。
														
 
															             if err.type == "MessageNotExist":
														
 
															                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
														
 
															+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
														
 
															                 continue
														
 
															             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
														
 
															+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
														
 
															             time.sleep(2)
														
 
															             continue
														
--- a/xigua/xigua_recommend/xigua_recommend_scheduling.py
+++ b/xigua/xigua_recommend/xigua_recommend_scheduling.py
@@ -605,10 +605,12 @@ class XiguarecommendScheduling:
 
															         queryCount = 1
														
 
															         while True:
														
 
															             Common.logger(log_type, crawler).info(f"正在抓取第{queryCount}页视频")
														
 
															+            Common.logging(log_type, crawler, env, f"正在抓取第{queryCount}页视频")
														
 
															             try:
														
 
															                 signature = cls.get_signature(env)
														
 
															                 if signature is None:
														
 
															                     Common.logger(log_type, crawler).warning(f"signature:{signature}")
														
 
															+                    Common.logging(log_type, crawler, env, f"signature:{signature}")
														
 
															                     time.sleep(1)
														
 
															                     continue
														
 
															                 url = "https://www.ixigua.com/api/feedv2/feedById?"
														
@@ -654,18 +656,23 @@ class XiguarecommendScheduling:
 
															                 queryCount += 1
														
 
															                 if response.status_code != 200:
														
 
															                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
														
 
															+                    Common.logging(log_type, crawler, env, f"get_videolist_response:{response.text}\n")
														
 
															                     return
														
 
															                 elif 'data' not in response.text:
														
 
															                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
														
 
															+                    Common.logging(log_type, crawler, env, f"get_videolist_response:{response.text}\n")
														
 
															                     return
														
 
															                 elif 'channelFeed' not in response.json()['data']:
														
 
															                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
														
 
															+                    Common.logging(log_type, crawler, env, f"get_videolist_response:{response.json()}\n")
														
 
															                     return
														
 
															                 elif 'Data' not in response.json()['data']['channelFeed']:
														
 
															                     Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
														
 
															+                    Common.logging(log_type, crawler, env, f"get_videolist_response:{response.json()}\n")
														
 
															                     return
														
 
															                 elif len(response.json()['data']['channelFeed']['Data']) == 0:
														
 
															                     Common.logger(log_type, crawler).warning(f"没有更多数据啦 ~ :{response.json()}\n")
														
 
															+                    Common.logging(log_type, crawler, env, f"没有更多数据啦 ~ :{response.json()}\n")
														
 
															                     return
														
 
															                 else:
														
 
															                     feeds = response.json()['data']['channelFeed']['Data']
														
@@ -674,15 +681,19 @@ class XiguarecommendScheduling:
 
															                             item_id = feeds[i].get("data", {}).get("item_id", "")
														
 
															                             if item_id == "":
														
 
															                                 Common.logger(log_type, crawler).info("无效视频\n")
														
 
															+                                Common.logging(log_type, crawler, env, "无效视频\n")
														
 
															                                 continue
														
 
															                             video_dict = cls.get_video_info(log_type, crawler, item_id)
														
 
															                             if video_dict is None:
														
 
															                                 Common.logger(log_type, crawler).info("无效视频\n")
														
 
															+                                Common.logging(log_type, crawler, env, "无效视频\n")
														
 
															                                 continue
														
 
															                             for k, v in video_dict.items():
														
 
															                                 Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															+                            Common.logging(log_type, crawler, env, f"{video_dict}")
														
 
															                             if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
														
 
															                                 Common.logger(log_type, crawler).info("不满足抓取规则\n")
														
 
															+                                Common.logging(log_type, crawler, env, "不满足抓取规则\n")
														
 
															                             elif any(str(word) if str(word) in video_dict["video_title"] else False
														
 
															                                      for word in get_config_from_mysql(log_type=log_type,
														
 
															                                                                        source=crawler,
														
@@ -690,8 +701,10 @@ class XiguarecommendScheduling:
 
															                                                                        text="filter",
														
 
															                                                                        action="")) is True:
														
 
															                                 Common.logger(log_type, crawler).info('已中过滤词\n')
														
 
															+                                Common.logging(log_type, crawler, env, "已中过滤词\n")
														
 
															                             elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
														
 
															                                 Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															+                                Common.logging(log_type, crawler, env, "视频已下载\n")
														
 
															                             else:
														
 
															                                 cls.download_publish(log_type=log_type,
														
 
															                                                      crawler=crawler,
														
@@ -701,8 +714,10 @@ class XiguarecommendScheduling:
 
															                                                      env=env)
														
 
															                         except Exception as e:
														
 
															                             Common.logger(log_type, crawler).error(f"抓取单条视频时异常:{e}\n")
														
 
															+                            Common.logging(log_type, crawler, env, f"抓取单条视频时异常:{e}\n")
														
 
															             except Exception as e:
														
 
															                 Common.logger(log_type, crawler).error(f"抓取第{queryCount}页时异常:{e}\n")
														
 
															+                Common.logging(log_type, crawler, env, f"抓取第{queryCount}页时异常:{e}\n")
														
 
															     @classmethod
														
 
															     def download_publish(cls, log_type, crawler, our_uid, video_dict, rule_dict, env):
														
@@ -718,11 +733,13 @@ class XiguarecommendScheduling:
 
															                 # 删除视频文件夹
														
 
															                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															                 Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															+                Common.logging(log_type, crawler, env, "视频size=0，删除成功\n")
														
 
															                 return
														
 
															         except FileNotFoundError:
														
 
															             # 删除视频文件夹
														
 
															             shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															             Common.logger(log_type, crawler).info("视频文件不存在，删除文件夹成功\n")
														
 
															+            Common.logging(log_type, crawler, env, "视频文件不存在，删除文件夹成功\n")
														
 
															             return
														
 
															         # 下载封面
														
 
															         Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
														
@@ -731,6 +748,7 @@ class XiguarecommendScheduling:
 
															         # 上传视频
														
 
															         Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															+        Common.logging(log_type, crawler, env, "开始上传视频...")
														
 
															         if env == "dev":
														
 
															             oss_endpoint = "out"
														
 
															             our_video_id = Publish.upload_and_publish(log_type=log_type,
														
@@ -791,8 +809,10 @@ class XiguarecommendScheduling:
 
															                                         {int(video_dict['video_width'])},
														
 
															                                         {int(video_dict['video_height'])}) """
														
 
															         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
														
 
															         MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
														
 
															         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
														
 
															+        Common.logging(log_type, crawler, env, f"视频信息写入数据库成功")
														
 
															         # 视频写入飞书
														
 
															         Feishu.insert_columns(log_type, crawler, "1iKGF1", "ROWS", 1, 2)
														
@@ -819,6 +839,7 @@ class XiguarecommendScheduling:
 
															         time.sleep(1)
														
 
															         Feishu.update_values(log_type, 'xigua', "1iKGF1", "F2:Z2", values)
														
 
															         Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
														
 
															+        Common.logging(log_type, crawler, env, f"视频已保存至云文档\n")
														
 
															 if __name__ == "__main__":
														
--- a/xigua/xigua_search/xigua_search_scheduling.py
+++ b/xigua/xigua_search/xigua_search_scheduling.py
@@ -565,6 +565,7 @@ class XiguasearchScheduling:
 
															         driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(chromedriver))
														
 
															         driver.implicitly_wait(10)
														
 
															         Common.logger(log_type, crawler).info(f"打开搜索页:{user_dict['link']}")
														
 
															+        Common.logging(log_type, crawler, env, f"打开搜索页:{user_dict['link']}")
														
 
															         driver.get(f"https://www.ixigua.com/search/{user_dict['link']}/")
														
 
															         time.sleep(3)
														
 
															         # driver.get_screenshot_as_file(f"./{crawler}/logs/打开搜索页.jpg")
														
@@ -594,20 +595,24 @@ class XiguasearchScheduling:
 
															             video_element_temp = video_elements[index:]
														
 
															             if len(video_element_temp) == 0:
														
 
															                 Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
														
 
															+                Common.logging(log_type, crawler, env, '到底啦~~~~~~~~~~~~~\n')
														
 
															                 driver.quit()
														
 
															                 return
														
 
															             for i, video_element in enumerate(video_element_temp):
														
 
															                 try:
														
 
															                     if cls.download_cnt >= int(rule_dict.get("videos_cnt", {}).get("min", 30)):
														
 
															                         Common.logger(log_type, crawler).info(f"搜索词: {user_dict['link']}，已下载视频数: {cls.download_cnt}\n")
														
 
															+                        Common.logging(log_type, crawler, env, f"搜索词: {user_dict['link']}，已下载视频数: {cls.download_cnt}\n")
														
 
															                         driver.quit()
														
 
															                         return
														
 
															                     if video_element is None:
														
 
															                         Common.logger(log_type, crawler).info('到底啦~\n')
														
 
															+                        Common.logging(log_type, crawler, env, '到底啦~\n')
														
 
															                         driver.quit()
														
 
															                         return
														
 
															                     num += 1
														
 
															                     Common.logger(log_type, crawler).info(f'拖动"视频"列表第{num}个至屏幕中间')
														
 
															+                    Common.logging(log_type, crawler, env, f'拖动"视频"列表第{num}个至屏幕中间')
														
 
															                     driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
														
 
															                     time.sleep(3)
														
 
															                     # driver.get_screenshot_as_file(f"./{crawler}/logs/{num}.jpg")
														
@@ -616,15 +621,18 @@ class XiguasearchScheduling:
 
															                     video_dict = cls.get_video_info(log_type, crawler, item_id)
														
 
															                     if video_dict is None:
														
 
															                         Common.logger(log_type, crawler).info("无效视频\n")
														
 
															+                        Common.logging(log_type, crawler, env, "无效视频\n")
														
 
															                         continue
														
 
															                     for k, v in video_dict.items():
														
 
															                         Common.logger(log_type, crawler).info(f"{k}:{v}")
														
 
															+                    Common.logging(log_type, crawler, env, f"{video_dict}")
														
 
															                     # if int((int(time.time()) - int(video_dict["publish_time_stamp"])) / (3600 * 24)) > int(rule_dict.get("period", {}).get("max", 1000)):
														
 
															                     #     Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
														
 
															                     #     driver.quit()
														
 
															                     #     return
														
 
															                     if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
														
 
															                         Common.logger(log_type, crawler).info("不满足抓取规则\n")
														
 
															+                        Common.logging(log_type, crawler, env, "不满足抓取规则\n")
														
 
															                     elif any(str(word) if str(word) in video_dict["video_title"] else False
														
 
															                              for word in get_config_from_mysql(log_type=log_type,
														
 
															                                                                source=crawler,
														
@@ -632,8 +640,10 @@ class XiguasearchScheduling:
 
															                                                                text="filter",
														
 
															                                                                action="")) is True:
														
 
															                         Common.logger(log_type, crawler).info('已中过滤词\n')
														
 
															+                        Common.logging(log_type, crawler, env, '已中过滤词\n')
														
 
															                     elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
														
 
															                         Common.logger(log_type, crawler).info('视频已下载\n')
														
 
															+                        Common.logging(log_type, crawler, env, '视频已下载\n')
														
 
															                     else:
														
 
															                         cls.download_publish(log_type=log_type,
														
 
															                                              crawler=crawler,
														
@@ -643,8 +653,10 @@ class XiguasearchScheduling:
 
															                                              env=env)
														
 
															                 except Exception as e:
														
 
															                     Common.logger(log_type, crawler).warning(f"抓取单条视频异常:{e}\n")
														
 
															+                    Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
														
 
															             Common.logger(log_type, crawler).info('已抓取完一组视频，休眠10秒\n')
														
 
															+            Common.logging(log_type, crawler, env, '已抓取完一组视频，休眠10秒\n')
														
 
															             time.sleep(10)
														
 
															             index = index + len(video_element_temp)
														
@@ -672,11 +684,13 @@ class XiguasearchScheduling:
 
															                 # 删除视频文件夹
														
 
															                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															                 Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
														
 
															+                Common.logging(log_type, crawler, env, "视频size=0，删除成功\n")
														
 
															                 return
														
 
															         except FileNotFoundError:
														
 
															             # 删除视频文件夹
														
 
															             shutil.rmtree(f"./{crawler}/videos/{md_title}")
														
 
															             Common.logger(log_type, crawler).info("视频文件不存在，删除文件夹成功\n")
														
 
															+            Common.logging(log_type, crawler, env, "视频文件不存在，删除文件夹成功\n")
														
 
															             return
														
 
															         # 下载封面
														
 
															         Common.download_method(log_type=log_type, crawler=crawler, text='cover',
														
@@ -686,6 +700,7 @@ class XiguasearchScheduling:
 
															         # 上传视频
														
 
															         Common.logger(log_type, crawler).info("开始上传视频...")
														
 
															+        Common.logging(log_type, crawler, env, "开始上传视频...")
														
 
															         if env == "dev":
														
 
															             oss_endpoint = "out"
														
 
															             our_video_id = Publish.upload_and_publish(log_type=log_type,
														
@@ -746,9 +761,11 @@ class XiguasearchScheduling:
 
															                                 {int(video_dict['video_width'])},
														
 
															                                 {int(video_dict['video_height'])}) """
														
 
															         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
														
 
															+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
														
 
															         MysqlHelper.update_values(log_type, crawler, insert_sql, env, action="")
														
 
															         cls.download_cnt += 1
														
 
															         Common.logger(log_type, crawler).info("视频信息写入数据库完成")
														
 
															+        Common.logging(log_type, crawler, env, "视频信息写入数据库完成")
														
 
															         # 视频信息写入飞书
														
 
															         Feishu.insert_columns(log_type, crawler, "BUNvGC", "ROWS", 1, 2)
														
@@ -775,6 +792,7 @@ class XiguasearchScheduling:
 
															         time.sleep(0.5)
														
 
															         Feishu.update_values(log_type, crawler, "BUNvGC", "E2:Z2", values)
														
 
															         Common.logger(log_type, crawler).info('视频信息写入飞书完成\n')
														
 
															+        Common.logging(log_type, crawler, env, '视频信息写入飞书完成\n')
														
 
															     @classmethod
														
 
															     def get_search_videos(cls, log_type, crawler, user_list, rule_dict, env):
														
@@ -782,6 +800,7 @@ class XiguasearchScheduling:
 
															             try:
														
 
															                 cls.download_cnt = 0
														
 
															                 Common.logger(log_type, crawler).info(f"开始抓取 {user_dict['link']} 视频\n")
														
 
															+                Common.logging(log_type, crawler, env, f"开始抓取 {user_dict['link']} 视频\n")
														
 
															                 cls.get_videoList(log_type=log_type,
														
 
															                                   crawler=crawler,
														
 
															                                   user_dict=user_dict,
														
@@ -789,6 +808,7 @@ class XiguasearchScheduling:
 
															                                   env=env)
														
 
															             except Exception as e:
														
 
															                 Common.logger(log_type, crawler).error(f"抓取{user_dict['link']}视频时异常:{e}\n")
														
 
															+                Common.logging(log_type, crawler, env, f"抓取{user_dict['link']}视频时异常:{e}\n")
														
 
															 if __name__ == '__main__':