%!s(int64=2) %!d(string=hai) anos · 7bf2b06d74
--- a/README.MD
+++ b/README.MD
@@ -200,5 +200,6 @@ jieba==0.42.1
 
				 # pip3 install workalendar
			
 
				 workalendar==17.0.0
			
 
				 # pip3 install aliyun_python_sdk
			
 
				+# pip3 install -U aliyun-log-python-sdk
			
 
				 aliyun_python_sdk==2.2.0
			
 
				 ```
			
--- a/common/common.py
+++ b/common/common.py
@@ -42,7 +42,9 @@ class Common:
 
				 
			
 
				         # 日志文件名
			
 
				         # log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + f'-{crawler}-{log_type}.log'
			
 
				-        log_name = str(date.today()) + f"-{crawler}-{log_type}.log"
			
 
				+        # log_name = datetime.datetime.now().strftime('%Y-%m-%d') + f'-{crawler}-{log_type}.log'
			
 
				+        log_name = datetime.datetime.now().strftime('%Y-%m-%d') + '-' + crawler + '-' + log_type + '.log'
			
 
				+        # log_name = str(date.today()) + f"-{crawler}-{log_type}.log"
			
 
				 
			
 
				         # 日志不打印到控制台
			
 
				         logger.remove(handler_id=None)
			
@@ -82,7 +84,6 @@ class Common:
 
				             endpoint = 'cn-hangzhou-intranet.log.aliyuncs.com'
			
 
				 
			
 
				         # 创建 LogClient 实例
			
 
				-        # print("创建 LogClient 实例")
			
 
				         client = LogClient(endpoint, accessKeyId, accessKey)
			
 
				 
			
 
				 
			
@@ -90,28 +91,28 @@ class Common:
 
				             message = message.replace('\r', ' ')
			
 
				         if '\n' in message:
			
 
				             message = message.replace('\n', ' ')
			
 
				-        # print(f"message:{message}")
			
 
				         log_group = []
			
 
				         log_item = LogItem()
			
 
				-        # print(f"log_item:{type(log_item), log_item}")
			
 
				-        contents = [(f"{crawler}-{log_type}", message)]
			
 
				-        # print(f"contents:{type(contents), contents}")
			
 
				+
			
 
				+        """
			
 
				+        生成日志消息体格式，例如
			
 
				+        crawler:xigua
			
 
				+        message:不满足抓取规则 
			
 
				+        mode:search
			
 
				+        timestamp:1686656143
			
 
				+        """
			
 
				+        contents = [(f"crawler", str(crawler)), (f"mode", str(log_type)), (f"message", str(message)), ("timestamp", str(int(time.time())))]
			
 
				         log_item.set_contents(contents)
			
 
				         log_group.append(log_item)
			
 
				-        # print(f"log_group:{type(log_group), log_group}")
			
 
				 
			
 
				         # 写入日志
			
 
				-        # print("开始PutLogsRequest")
			
 
				         request = PutLogsRequest(project=project,
			
 
				                                  logstore=logstore,
			
 
				                                  topic="",
			
 
				                                  source="",
			
 
				                                  logitems=log_group,
			
 
				                                  compress=False)
			
 
				-        # print(f"request:{request}")
			
 
				-        # print("put_logs...")
			
 
				         client.put_logs(request)
			
 
				-        # print("put_logs...done")
			
 
				 
			
 
				     # 清除日志，保留最近 10 个文件
			
 
				     @classmethod
			
@@ -398,5 +399,5 @@ class Common:
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    Common.tunnel_proxies()
			
 
				+
			
 
				     pass
			
--- a/xigua/xigua_main/run_xg_search_dev.py
+++ b/xigua/xigua_main/run_xg_search_dev.py
@@ -0,0 +1,25 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/6/13
			
 
				+import os
			
 
				+import sys
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				+from xigua.xigua_search.xigua_search_scheduling import XiguasearchScheduling
			
 
				+
			
 
				+
			
 
				+def xigua_search_main(log_type, crawler, env):
			
 
				+    Common.logger(log_type, crawler).info("开始抓取西瓜搜索\n")
			
 
				+    Common.logging(log_type, crawler, env, "开始抓取西瓜搜索\n")
			
 
				+    XiguasearchScheduling.get_search_videos(log_type=log_type,
			
 
				+                                            crawler=crawler,
			
 
				+                                            rule_dict={"play_cnt":{"min":8000,"max":0},"duration":{"min":60,"max":600},"period":{"min":365,"max":365},"videos_cnt":{"min":30,"max":0}},
			
 
				+                                            user_list=[{"uid": 6267140, "source": "xigua", "link": "退休补贴", "nick_name": "西瓜搜索测试账号", "avatar_url": "http://rescdn.yishihui.com/user/default/avatar/live/1616555578819_u=1922778943,2660693611&fm=26&gp=0.jpg", "mode": "search"}],
			
 
				+                                            env=env)
			
 
				+    Common.del_logs(log_type, crawler)
			
 
				+    Common.logger(log_type, crawler).info("抓取一轮结束\n")
			
 
				+    Common.logging(log_type, crawler, env, "抓取一轮结束\n")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    xigua_search_main("search", "xigua", "dev")
			
--- a/xigua/xigua_search/xigua_search_scheduling.py
+++ b/xigua/xigua_search/xigua_search_scheduling.py
@@ -22,7 +22,7 @@ from common.scheduling_db import MysqlHelper
 
				 from common.common import Common
			
 
				 from common.feishu import Feishu
			
 
				 from common.publish import Publish
			
 
				-from common.public import get_config_from_mysql, download_rule
			
 
				+from common.public import get_config_from_mysql, download_rule, get_title_score
			
 
				 from common.userAgent import get_random_user_agent
			
 
				 
			
 
				 
			
@@ -645,11 +645,19 @@ class XiguasearchScheduling:
 
				                         Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				                         Common.logging(log_type, crawler, env, '视频已下载\n')
			
 
				                     else:
			
 
				+                        title_score = get_title_score(log_type, "kuaishou", "16QspO", "0usaDk", video_dict["video_title"])
			
 
				+                        if title_score <= 0.3:
			
 
				+                            Common.logger(log_type, crawler).info(f"权重分:{title_score}<=0.3\n")
			
 
				+                            Common.logging(log_type, crawler, env, f"权重分:{title_score}<=0.3\n")
			
 
				+                            continue
			
 
				+                        Common.logger(log_type, crawler).info(f"权重分:{title_score}>0.3\n")
			
 
				+                        Common.logging(log_type, crawler, env, f"权重分:{title_score}>0.3\n")
			
 
				                         cls.download_publish(log_type=log_type,
			
 
				                                              crawler=crawler,
			
 
				                                              user_dict=user_dict,
			
 
				                                              video_dict=video_dict,
			
 
				                                              rule_dict=rule_dict,
			
 
				+                                             title_score=title_score,
			
 
				                                              env=env)
			
 
				                 except Exception as e:
			
 
				                     Common.logger(log_type, crawler).warning(f"抓取单条视频异常:{e}\n")
			
@@ -668,7 +676,7 @@ class XiguasearchScheduling:
 
				 
			
 
				     # 下载 / 上传
			
 
				     @classmethod
			
 
				-    def download_publish(cls, log_type, crawler, user_dict, video_dict, rule_dict, env):
			
 
				+    def download_publish(cls, log_type, crawler, user_dict, video_dict, rule_dict, title_score, env):
			
 
				 
			
 
				         Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
			
 
				                                title=video_dict['video_title'], url=video_dict['video_url'])
			
@@ -769,7 +777,8 @@ class XiguasearchScheduling:
 
				 
			
 
				         # 视频信息写入飞书
			
 
				         Feishu.insert_columns(log_type, crawler, "BUNvGC", "ROWS", 1, 2)
			
 
				-        values = [[user_dict["link"],
			
 
				+        values = [[title_score,
			
 
				+            user_dict["link"],
			
 
				             time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
			
 
				             "关键词搜索",
			
 
				             video_dict['video_title'],
			
@@ -790,7 +799,7 @@ class XiguasearchScheduling:
 
				             video_dict['video_url'],
			
 
				             video_dict['audio_url']]]
			
 
				         time.sleep(0.5)
			
 
				-        Feishu.update_values(log_type, crawler, "BUNvGC", "E2:Z2", values)
			
 
				+        Feishu.update_values(log_type, crawler, "BUNvGC", "D2:Z2", values)
			
 
				         Common.logger(log_type, crawler).info('视频信息写入飞书完成\n')
			
 
				         Common.logging(log_type, crawler, env, '视频信息写入飞书完成\n')