Bläddra i källkod

看一看plus调试

zhangyong 1 år sedan
förälder
incheckning
21f30de051
1 ändrade filer med 123 tillägg och 99 borttagningar
  1. 123 99
      kanyikan/kanyikan_recommend/kanyikan_recommend_plus.py

+ 123 - 99
kanyikan/kanyikan_recommend/kanyikan_recommend_plus.py

@@ -5,6 +5,8 @@ import os
 import random
 import sys
 import time
+from datetime import datetime
+
 import requests
 import urllib3
 
@@ -15,6 +17,8 @@ from common.common import Common
 from common.scheduling_db import MysqlHelper
 from common import AliyunLogger
 from common.public import get_config_from_mysql, download_rule
+from common.feishu import Feishu
+
 proxies = {"http": None, "https": None}
 
 
@@ -188,105 +192,125 @@ class KanyikanRecommend:
                             "video_url": video_url,
                             "session": session,
                         }
-                        for k, v in video_dict.items():
-                            Common.logger(log_type, crawler).info(f"{k}:{v}")
-                        Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
-                        AliyunLogger.logging(
-                            code="1000",
-                            platform=crawler,
-                            mode=log_type,
-                            env=env,
-                            message=f"{video_dict}\n"
-                        )
-                        video_percent = '%.2f' % (shared_cnt / playCount)
-                        if float(video_percent) < 0.05:
-                            Common.logger(log_type, crawler).info(f"分享/播放:{video_percent}\n")
-                            Common.logging(log_type, crawler, env, f"分享/播放:{video_percent}\n")
-                            AliyunLogger.logging(
-                                code="2004",
-                                platform=crawler,
-                                mode=log_type,
-                                env=env,
-                                message=f"不符合抓取条件,分享/播放:{video_percent}\n"
-                            )
-                            continue
-                        elif shared_cnt < 800:
-                            Common.logger(log_type, crawler).info(f"播放量:{playCount}\n")
-                            Common.logging(log_type, crawler, env, f"播放量:{playCount}\n")
-                            AliyunLogger.logging(
-                                code="2004",
-                                platform=crawler,
-                                mode=log_type,
-                                env=env,
-                                message=f"不符合抓取条件,播放量:{playCount}\n"
-                            )
-                            continue
-                        if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
-                            Common.logger(log_type, crawler).info("无效视频\n")
-                            Common.logging(log_type, crawler, env, "无效视频\n")
-                            AliyunLogger.logging(
-                                code="2004",
-                                platform=crawler,
-                                mode=log_type,
-                                env=env,
-                                message=f"无效视频"
-                            )
-                        elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
-                            Common.logger(log_type, crawler).info("不满足抓取规则\n")
-                            Common.logging(log_type, crawler, env, "不满足抓取规则\n")
-                            AliyunLogger.logging(
-                                code="2004",
-                                platform=crawler,
-                                mode=log_type,
-                                env=env,
-                                message='不满足抓取规则\n'
-                            )
-                        elif any(str(word) if str(word) in video_dict["video_title"] else False
-                                 for word in get_config_from_mysql(log_type=log_type,
-                                                                   source=crawler,
-                                                                   env=env,
-                                                                   text="filter",
-                                                                   action="")) is True:
-                            Common.logger(log_type, crawler).info('已中过滤词\n')
-                            Common.logging(log_type, crawler, env, '已中过滤词\n')
-                            AliyunLogger.logging(
-                                code="2004",
-                                platform=crawler,
-                                mode=log_type,
-                                env=env,
-                                message='已中过滤词\n'
-                            )
-                        elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
-                            Common.logger(log_type, crawler).info('视频已下载\n')
-                            Common.logging(log_type, crawler, env, '视频已下载\n')
-                            AliyunLogger.logging(
-                                code="2002",
-                                platform=crawler,
-                                mode=log_type,
-                                env=env,
-                                message='视频已下载\n'
-                            )
-
-                        else:
-                            video_dict["out_user_id"] = video_dict["user_id"]
-                            video_dict["platform"] = crawler
-                            video_dict["strategy"] = log_type
-                            video_dict["strategy_type"] = "data"
-                            video_dict["out_video_id"] = video_dict["video_id"]
-                            video_dict["width"] = video_dict["video_width"]
-                            video_dict["height"] = video_dict["video_height"]
-                            video_dict["crawler_rule"] = json.dumps(rule_dict)
-                            video_dict["user_id"] = our_uid
-                            video_dict["publish_time"] = video_dict["publish_time_str"]
-                            cls.insert_video_id(log_type, crawler, video_id, env)
-                            AliyunLogger.logging(
-                                code="1010",
-                                platform=crawler,
-                                mode=log_type,
-                                env=env,
-                                message=f"看一看video_id:{video_id}入库",
-                            )
-                            mq.send_msg(video_dict)
+                        # 获取当前时间
+                        current_time = datetime.now()
+                        formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
+                        values = [[
+                            videoId,
+                            publish_time_str,
+                            video_title,
+                            feeds[i].get("playCount", 0),
+                            feeds[i].get("liked_cnt", 0),
+                            feeds[i].get("comment_cnt", 0),
+                            feeds[i].get("shared_cnt", 0),
+                            feeds[i].get("mediaDuration", 0),
+                            publish_time_str,
+                            formatted_time,
+                            feeds[i].get("thumbUrl", ""),
+                            video_url
+                        ]]
+                        Feishu.insert_columns('kanyikan', 'kanyikan', "yQzAil", "ROWS", 1, 2)
+                        time.sleep(0.5)
+                        Feishu.update_values('kanyikan', 'kanyikan', "yQzAil", "A2:Z2", values)
+                        # for k, v in video_dict.items():
+                        #     Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        # Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
+                        # AliyunLogger.logging(
+                        #     code="1000",
+                        #     platform=crawler,
+                        #     mode=log_type,
+                        #     env=env,
+                        #     message=f"{video_dict}\n"
+                        # )
+                        # video_percent = '%.2f' % (shared_cnt / playCount)
+                        # if float(video_percent) < 0.05:
+                        #     Common.logger(log_type, crawler).info(f"分享/播放:{video_percent}\n")
+                        #     Common.logging(log_type, crawler, env, f"分享/播放:{video_percent}\n")
+                        #     AliyunLogger.logging(
+                        #         code="2004",
+                        #         platform=crawler,
+                        #         mode=log_type,
+                        #         env=env,
+                        #         message=f"不符合抓取条件,分享/播放:{video_percent}\n"
+                        #     )
+                        #     continue
+                        # elif shared_cnt < 800:
+                        #     Common.logger(log_type, crawler).info(f"播放量:{playCount}\n")
+                        #     Common.logging(log_type, crawler, env, f"播放量:{playCount}\n")
+                        #     AliyunLogger.logging(
+                        #         code="2004",
+                        #         platform=crawler,
+                        #         mode=log_type,
+                        #         env=env,
+                        #         message=f"不符合抓取条件,播放量:{playCount}\n"
+                        #     )
+                        #     continue
+                        # if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
+                        #     Common.logger(log_type, crawler).info("无效视频\n")
+                        #     Common.logging(log_type, crawler, env, "无效视频\n")
+                        #     AliyunLogger.logging(
+                        #         code="2004",
+                        #         platform=crawler,
+                        #         mode=log_type,
+                        #         env=env,
+                        #         message=f"无效视频"
+                        #     )
+                        # elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
+                        #     Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                        #     Common.logging(log_type, crawler, env, "不满足抓取规则\n")
+                        #     AliyunLogger.logging(
+                        #         code="2004",
+                        #         platform=crawler,
+                        #         mode=log_type,
+                        #         env=env,
+                        #         message='不满足抓取规则\n'
+                        #     )
+                        # elif any(str(word) if str(word) in video_dict["video_title"] else False
+                        #          for word in get_config_from_mysql(log_type=log_type,
+                        #                                            source=crawler,
+                        #                                            env=env,
+                        #                                            text="filter",
+                        #                                            action="")) is True:
+                        #     Common.logger(log_type, crawler).info('已中过滤词\n')
+                        #     Common.logging(log_type, crawler, env, '已中过滤词\n')
+                        #     AliyunLogger.logging(
+                        #         code="2004",
+                        #         platform=crawler,
+                        #         mode=log_type,
+                        #         env=env,
+                        #         message='已中过滤词\n'
+                        #     )
+                        # elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
+                        #     Common.logger(log_type, crawler).info('视频已下载\n')
+                        #     Common.logging(log_type, crawler, env, '视频已下载\n')
+                        #     AliyunLogger.logging(
+                        #         code="2002",
+                        #         platform=crawler,
+                        #         mode=log_type,
+                        #         env=env,
+                        #         message='视频已下载\n'
+                        #     )
+                        #
+                        # else:
+                        #     video_dict["out_user_id"] = video_dict["user_id"]
+                        #     video_dict["platform"] = crawler
+                        #     video_dict["strategy"] = log_type
+                        #     video_dict["strategy_type"] = "data"
+                        #     video_dict["out_video_id"] = video_dict["video_id"]
+                        #     video_dict["width"] = video_dict["video_width"]
+                        #     video_dict["height"] = video_dict["video_height"]
+                        #     video_dict["crawler_rule"] = json.dumps(rule_dict)
+                        #     video_dict["user_id"] = our_uid
+                        #     video_dict["publish_time"] = video_dict["publish_time_str"]
+                        #     cls.insert_video_id(log_type, crawler, video_id, env)
+                        #     AliyunLogger.logging(
+                        #         code="1010",
+                        #         platform=crawler,
+                        #         mode=log_type,
+                        #         env=env,
+                        #         message=f"看一看video_id:{video_id}入库",
+                        #     )
+                        #     mq.send_msg(video_dict)
                         time.sleep(random.randint(10, 15))
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")