Browse Source

看一看增加新日志

zhangyong 1 year ago
parent
commit
f4fe1365d4

+ 94 - 2
kanyikan/kanyikan_recommend/kanyikan_recommend_feed.py

@@ -7,7 +7,10 @@ import sys
 import time
 import requests
 import urllib3
+
+
 sys.path.append(os.getcwd())
+from common import AliyunLogger
 from common.mq import MQ
 from common.common import Common
 from common.scheduling_db import MysqlHelper
@@ -59,12 +62,26 @@ class KanyikanRecommend:
         if "data" not in response.text:
             Common.logger(log_type, crawler).info("获取视频list时,session过期,随机睡眠 31-50 秒")
             Common.logging(log_type, crawler, env, "获取视频list时,session过期,随机睡眠 31-50 秒")
+            AliyunLogger.logging(
+                code="2000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"获取视频list时,session过期,随机睡眠 31-50 秒"
+            )
             # 如果返回空信息,则随机睡眠 31-40 秒
             time.sleep(random.randint(31, 40))
             cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
         elif "items" not in response.json()["data"]:
             Common.logger(log_type, crawler).info(f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
             Common.logging(log_type, crawler, env, f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
+            AliyunLogger.logging(
+                code="2000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"get_feeds:{response.json()},随机睡眠 1-3 分钟"
+            )
             # 如果返回空信息,则随机睡眠 1-3 分钟
             time.sleep(random.randint(60, 180))
             cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
@@ -76,8 +93,6 @@ class KanyikanRecommend:
     def get_videoList(cls, log_type, crawler, our_uid, rule_dict, env):
         mq = MQ(topic_name="topic_crawler_etl_" + env)
         try:
-            Common.logger(log_type, crawler).info(f"正在抓取列表页")
-            Common.logging(log_type, crawler, env, f"正在抓取列表页")
             session = Common.get_session(log_type, crawler, env)
             if session is None:
                 time.sleep(1)
@@ -124,12 +139,26 @@ class KanyikanRecommend:
                 if "data" not in response.text:
                     Common.logger(log_type, crawler).info("获取视频list时,session过期,随机睡眠 31-50 秒")
                     Common.logging(log_type, crawler, env, "获取视频list时,session过期,随机睡眠 31-50 秒")
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=crawler,
+                        mode=log_type,
+                        env=env,
+                        message=f"获取视频list时,session过期,随机睡眠 31-50 秒"
+                    )
                     # 如果返回空信息,则随机睡眠 31-40 秒
                     time.sleep(random.randint(31, 40))
                     cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
                 elif "items" not in response.json()["data"]:
                     Common.logger(log_type, crawler).info(f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
                     Common.logging(log_type, crawler, env, f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=crawler,
+                        mode=log_type,
+                        env=env,
+                        message=f"get_feeds:{response.json()},随机睡眠 1-3 分钟"
+                    )
                     # 如果返回空信息,则随机睡眠 1-3 分钟
                     time.sleep(random.randint(60, 180))
                     cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
@@ -137,9 +166,23 @@ class KanyikanRecommend:
                 if feeds == "":
                     Common.logger(log_type, crawler).info(f"feeds:{feeds}")
                     Common.logging(log_type, crawler, env, f"feeds:{feeds}")
+                    AliyunLogger.logging(
+                        code="2001",
+                        platform=crawler,
+                        mode=log_type,
+                        env=env,
+                        message=f"没有更多视频啦 ~\n"
+                    )
                     return
                 for j in range(len(feeds)):
                     try:
+                        AliyunLogger.logging(
+                            code="1001",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message='扫描到一条视频\n'
+                        )
                         video_title = feeds[j].get("title", "").strip().replace("\n", "") \
                             .replace("/", "").replace("\\", "").replace("\r", "") \
                             .replace(":", "").replace("*", "").replace("?", "") \
@@ -191,15 +234,36 @@ class KanyikanRecommend:
                         for k, v in video_dict.items():
                             Common.logger(log_type, crawler).info(f"{k}:{v}")
                         Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
+                        AliyunLogger.logging(
+                            code="1000",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message=f"{video_dict}\n"
+                        )
 
                         if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict[
                             "video_url"] == "":
                             Common.logger(log_type, crawler).info("无效视频\n")
                             Common.logging(log_type, crawler, env, "无效视频\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='无效视频\n'
+                            )
                         elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict,
                                            rule_dict=rule_dict) is False:
                             Common.logger(log_type, crawler).info("不满足抓取规则\n")
                             Common.logging(log_type, crawler, env, "不满足抓取规则\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='不满足抓取规则\n'
+                            )
                         elif any(str(word) if str(word) in video_dict["video_title"] else False
                                  for word in get_config_from_mysql(log_type=log_type,
                                                                    source=crawler,
@@ -208,9 +272,23 @@ class KanyikanRecommend:
                                                                    action="")) is True:
                             Common.logger(log_type, crawler).info('已中过滤词\n')
                             Common.logging(log_type, crawler, env, '已中过滤词\n')
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='已中过滤词\n'
+                            )
                         elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
                             Common.logger(log_type, crawler).info('视频已下载\n')
                             Common.logging(log_type, crawler, env, '视频已下载\n')
+                            AliyunLogger.logging(
+                                code="2002",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='视频已下载\n'
+                            )
                         else:
                             video_dict["out_user_id"] = video_dict["user_id"]
                             video_dict["platform"] = crawler
@@ -226,9 +304,23 @@ class KanyikanRecommend:
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
                         Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
+                        AliyunLogger.logging(
+                            code="3000",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message=f"抓取单条视频异常:{e}\n"
+                        )
         except Exception as e:
             Common.logger(log_type, crawler).error(f"抓取列表页时异常:{e}\n")
             Common.logging(log_type, crawler, env, f"抓取列表页时异常:{e}\n")
+            AliyunLogger.logging(
+                code="3000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"抓取列表页时异常:{e}\n"
+            )
 
 
 if __name__ == "__main__":

+ 87 - 4
kanyikan/kanyikan_recommend/kanyikan_recommend_plus.py

@@ -7,10 +7,13 @@ import sys
 import time
 import requests
 import urllib3
+
+
 sys.path.append(os.getcwd())
 from common.mq import MQ
 from common.common import Common
 from common.scheduling_db import MysqlHelper
+from common import AliyunLogger
 from common.public import get_config_from_mysql, download_rule
 proxies = {"http": None, "https": None}
 
@@ -29,10 +32,6 @@ class KanyikanRecommend:
     def get_videoList(cls, log_type, crawler, our_uid, rule_dict, env):
         mq = MQ(topic_name="topic_crawler_etl_" + env)
         try:
-
-            Common.logger(log_type, crawler).info(f"正在抓取列表页{crawler}")
-            Common.logging(log_type, crawler, env, f"正在抓取列表页")
-            Common.logger(log_type, crawler).info(f"Test{crawler}")
             session = Common.get_session(log_type, crawler, env)
             if session is None:
                 time.sleep(1)
@@ -103,12 +102,26 @@ class KanyikanRecommend:
                 if "data" not in response.text:
                     Common.logger(log_type, crawler).info("获取视频list时,session过期,随机睡眠 31-50 秒")
                     Common.logging(log_type, crawler, env, "获取视频list时,session过期,随机睡眠 31-50 秒")
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=crawler,
+                        mode=log_type,
+                        env=env,
+                        message=f"获取视频list时,session过期,随机睡眠 31-50 秒"
+                    )
                     # 如果返回空信息,则随机睡眠 31-40 秒
                     time.sleep(random.randint(31, 40))
                     cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
                 elif "items" not in response.json()["data"]:
                     Common.logger(log_type, crawler).info(f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
                     Common.logging(log_type, crawler, env, f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
+                    AliyunLogger.logging(
+                        code="2000",
+                        platform=crawler,
+                        mode=log_type,
+                        env=env,
+                        message=f"get_feeds:{response.json()},随机睡眠 1-3 分钟"
+                    )
                     # 如果返回空信息,则随机睡眠 1-3 分钟
                     time.sleep(random.randint(60, 180))
                     cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
@@ -119,6 +132,13 @@ class KanyikanRecommend:
                     return
                 for i in range(len(feeds)):
                     try:
+                        AliyunLogger.logging(
+                            code="1001",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message='扫描到一条视频\n'
+                        )
                         video_title = feeds[i].get("title", "").strip().replace("\n", "") \
                             .replace("/", "").replace("\\", "").replace("\r", "") \
                             .replace(":", "").replace("*", "").replace("?", "") \
@@ -166,21 +186,56 @@ class KanyikanRecommend:
                         for k, v in video_dict.items():
                             Common.logger(log_type, crawler).info(f"{k}:{v}")
                         Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
+                        AliyunLogger.logging(
+                            code="1000",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message=f"{video_dict}\n"
+                        )
                         video_percent = '%.2f' % (shared_cnt / playCount)
                         if float(video_percent) < 0.05:
                             Common.logger(log_type, crawler).info(f"分享/播放:{video_percent}\n")
                             Common.logging(log_type, crawler, env, f"分享/播放:{video_percent}\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message=f"不符合抓取条件,分享/播放:{video_percent}\n"
+                            )
                             continue
                         elif shared_cnt < 800:
                             Common.logger(log_type, crawler).info(f"播放量:{playCount}\n")
                             Common.logging(log_type, crawler, env, f"播放量:{playCount}\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message=f"不符合抓取条件,播放量:{playCount}\n"
+                            )
                             continue
                         if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
                             Common.logger(log_type, crawler).info("无效视频\n")
                             Common.logging(log_type, crawler, env, "无效视频\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message=f"无效视频"
+                            )
                         elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                             Common.logger(log_type, crawler).info("不满足抓取规则\n")
                             Common.logging(log_type, crawler, env, "不满足抓取规则\n")
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='不满足抓取规则\n'
+                            )
                         elif any(str(word) if str(word) in video_dict["video_title"] else False
                                  for word in get_config_from_mysql(log_type=log_type,
                                                                    source=crawler,
@@ -189,9 +244,23 @@ class KanyikanRecommend:
                                                                    action="")) is True:
                             Common.logger(log_type, crawler).info('已中过滤词\n')
                             Common.logging(log_type, crawler, env, '已中过滤词\n')
+                            AliyunLogger.logging(
+                                code="2004",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='已中过滤词\n'
+                            )
                         elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
                             Common.logger(log_type, crawler).info('视频已下载\n')
                             Common.logging(log_type, crawler, env, '视频已下载\n')
+                            AliyunLogger.logging(
+                                code="2002",
+                                platform=crawler,
+                                mode=log_type,
+                                env=env,
+                                message='视频已下载\n'
+                            )
 
                         else:
                             video_dict["out_user_id"] = video_dict["user_id"]
@@ -209,9 +278,23 @@ class KanyikanRecommend:
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
                         Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
+                        AliyunLogger.logging(
+                            code="3000",
+                            platform=crawler,
+                            mode=log_type,
+                            env=env,
+                            message=f"抓取单条视频异常:{e}\n"
+                        )
         except Exception as e:
             Common.logger(log_type, crawler).error(f"抓取列表页时异常:{e}\n")
             Common.logging(log_type, crawler, env, f"抓取列表页时异常:{e}\n")
+            AliyunLogger.logging(
+                code="3000",
+                platform=crawler,
+                mode=log_type,
+                env=env,
+                message=f"抓取列表页时异常:{e}\n"
+            )
 
 
 if __name__ == "__main__":