|
@@ -5,6 +5,8 @@ import os
|
|
|
import random
|
|
|
import sys
|
|
|
import time
|
|
|
+from datetime import datetime
|
|
|
+
|
|
|
import requests
|
|
|
import urllib3
|
|
|
|
|
@@ -15,6 +17,8 @@ from common.common import Common
|
|
|
from common.scheduling_db import MysqlHelper
|
|
|
from common import AliyunLogger
|
|
|
from common.public import get_config_from_mysql, download_rule
|
|
|
+from common.feishu import Feishu
|
|
|
+
|
|
|
proxies = {"http": None, "https": None}
|
|
|
|
|
|
|
|
@@ -188,105 +192,125 @@ class KanyikanRecommend:
|
|
|
"video_url": video_url,
|
|
|
"session": session,
|
|
|
}
|
|
|
- for k, v in video_dict.items():
|
|
|
- Common.logger(log_type, crawler).info(f"{k}:{v}")
|
|
|
- Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
|
|
|
- AliyunLogger.logging(
|
|
|
- code="1000",
|
|
|
- platform=crawler,
|
|
|
- mode=log_type,
|
|
|
- env=env,
|
|
|
- message=f"{video_dict}\n"
|
|
|
- )
|
|
|
- video_percent = '%.2f' % (shared_cnt / playCount)
|
|
|
- if float(video_percent) < 0.05:
|
|
|
- Common.logger(log_type, crawler).info(f"分享/播放:{video_percent}\n")
|
|
|
- Common.logging(log_type, crawler, env, f"分享/播放:{video_percent}\n")
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2004",
|
|
|
- platform=crawler,
|
|
|
- mode=log_type,
|
|
|
- env=env,
|
|
|
- message=f"不符合抓取条件,分享/播放:{video_percent}\n"
|
|
|
- )
|
|
|
- continue
|
|
|
- elif shared_cnt < 800:
|
|
|
- Common.logger(log_type, crawler).info(f"播放量:{playCount}\n")
|
|
|
- Common.logging(log_type, crawler, env, f"播放量:{playCount}\n")
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2004",
|
|
|
- platform=crawler,
|
|
|
- mode=log_type,
|
|
|
- env=env,
|
|
|
- message=f"不符合抓取条件,播放量:{playCount}\n"
|
|
|
- )
|
|
|
- continue
|
|
|
- if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
|
|
|
- Common.logger(log_type, crawler).info("无效视频\n")
|
|
|
- Common.logging(log_type, crawler, env, "无效视频\n")
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2004",
|
|
|
- platform=crawler,
|
|
|
- mode=log_type,
|
|
|
- env=env,
|
|
|
- message=f"无效视频"
|
|
|
- )
|
|
|
- elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
|
|
|
- Common.logger(log_type, crawler).info("不满足抓取规则\n")
|
|
|
- Common.logging(log_type, crawler, env, "不满足抓取规则\n")
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2004",
|
|
|
- platform=crawler,
|
|
|
- mode=log_type,
|
|
|
- env=env,
|
|
|
- message='不满足抓取规则\n'
|
|
|
- )
|
|
|
- elif any(str(word) if str(word) in video_dict["video_title"] else False
|
|
|
- for word in get_config_from_mysql(log_type=log_type,
|
|
|
- source=crawler,
|
|
|
- env=env,
|
|
|
- text="filter",
|
|
|
- action="")) is True:
|
|
|
- Common.logger(log_type, crawler).info('已中过滤词\n')
|
|
|
- Common.logging(log_type, crawler, env, '已中过滤词\n')
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2004",
|
|
|
- platform=crawler,
|
|
|
- mode=log_type,
|
|
|
- env=env,
|
|
|
- message='已中过滤词\n'
|
|
|
- )
|
|
|
- elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
|
|
|
- Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
- Common.logging(log_type, crawler, env, '视频已下载\n')
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2002",
|
|
|
- platform=crawler,
|
|
|
- mode=log_type,
|
|
|
- env=env,
|
|
|
- message='视频已下载\n'
|
|
|
- )
|
|
|
-
|
|
|
- else:
|
|
|
- video_dict["out_user_id"] = video_dict["user_id"]
|
|
|
- video_dict["platform"] = crawler
|
|
|
- video_dict["strategy"] = log_type
|
|
|
- video_dict["strategy_type"] = "data"
|
|
|
- video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
- video_dict["width"] = video_dict["video_width"]
|
|
|
- video_dict["height"] = video_dict["video_height"]
|
|
|
- video_dict["crawler_rule"] = json.dumps(rule_dict)
|
|
|
- video_dict["user_id"] = our_uid
|
|
|
- video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
- cls.insert_video_id(log_type, crawler, video_id, env)
|
|
|
- AliyunLogger.logging(
|
|
|
- code="1010",
|
|
|
- platform=crawler,
|
|
|
- mode=log_type,
|
|
|
- env=env,
|
|
|
- message=f"看一看video_id:{video_id}入库",
|
|
|
- )
|
|
|
- mq.send_msg(video_dict)
|
|
|
+ # 获取当前时间
|
|
|
+ current_time = datetime.now()
|
|
|
+ formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
+ values = [[
|
|
|
+ videoId,
|
|
|
+ publish_time_str,
|
|
|
+ video_title,
|
|
|
+ feeds[i].get("playCount", 0),
|
|
|
+ feeds[i].get("liked_cnt", 0),
|
|
|
+ feeds[i].get("comment_cnt", 0),
|
|
|
+ feeds[i].get("shared_cnt", 0),
|
|
|
+ feeds[i].get("mediaDuration", 0),
|
|
|
+ publish_time_str,
|
|
|
+ formatted_time,
|
|
|
+ feeds[i].get("thumbUrl", ""),
|
|
|
+ video_url
|
|
|
+ ]]
|
|
|
+ Feishu.insert_columns('kanyikan', 'kanyikan', "yQzAil", "ROWS", 1, 2)
|
|
|
+ time.sleep(0.5)
|
|
|
+ Feishu.update_values('kanyikan', 'kanyikan', "yQzAil", "A2:Z2", values)
|
|
|
+ # for k, v in video_dict.items():
|
|
|
+ # Common.logger(log_type, crawler).info(f"{k}:{v}")
|
|
|
+ # Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
|
|
|
+ # AliyunLogger.logging(
|
|
|
+ # code="1000",
|
|
|
+ # platform=crawler,
|
|
|
+ # mode=log_type,
|
|
|
+ # env=env,
|
|
|
+ # message=f"{video_dict}\n"
|
|
|
+ # )
|
|
|
+ # video_percent = '%.2f' % (shared_cnt / playCount)
|
|
|
+ # if float(video_percent) < 0.05:
|
|
|
+ # Common.logger(log_type, crawler).info(f"分享/播放:{video_percent}\n")
|
|
|
+ # Common.logging(log_type, crawler, env, f"分享/播放:{video_percent}\n")
|
|
|
+ # AliyunLogger.logging(
|
|
|
+ # code="2004",
|
|
|
+ # platform=crawler,
|
|
|
+ # mode=log_type,
|
|
|
+ # env=env,
|
|
|
+ # message=f"不符合抓取条件,分享/播放:{video_percent}\n"
|
|
|
+ # )
|
|
|
+ # continue
|
|
|
+ # elif shared_cnt < 800:
|
|
|
+ # Common.logger(log_type, crawler).info(f"播放量:{playCount}\n")
|
|
|
+ # Common.logging(log_type, crawler, env, f"播放量:{playCount}\n")
|
|
|
+ # AliyunLogger.logging(
|
|
|
+ # code="2004",
|
|
|
+ # platform=crawler,
|
|
|
+ # mode=log_type,
|
|
|
+ # env=env,
|
|
|
+ # message=f"不符合抓取条件,播放量:{playCount}\n"
|
|
|
+ # )
|
|
|
+ # continue
|
|
|
+ # if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
|
|
|
+ # Common.logger(log_type, crawler).info("无效视频\n")
|
|
|
+ # Common.logging(log_type, crawler, env, "无效视频\n")
|
|
|
+ # AliyunLogger.logging(
|
|
|
+ # code="2004",
|
|
|
+ # platform=crawler,
|
|
|
+ # mode=log_type,
|
|
|
+ # env=env,
|
|
|
+ # message=f"无效视频"
|
|
|
+ # )
|
|
|
+ # elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
|
|
|
+ # Common.logger(log_type, crawler).info("不满足抓取规则\n")
|
|
|
+ # Common.logging(log_type, crawler, env, "不满足抓取规则\n")
|
|
|
+ # AliyunLogger.logging(
|
|
|
+ # code="2004",
|
|
|
+ # platform=crawler,
|
|
|
+ # mode=log_type,
|
|
|
+ # env=env,
|
|
|
+ # message='不满足抓取规则\n'
|
|
|
+ # )
|
|
|
+ # elif any(str(word) if str(word) in video_dict["video_title"] else False
|
|
|
+ # for word in get_config_from_mysql(log_type=log_type,
|
|
|
+ # source=crawler,
|
|
|
+ # env=env,
|
|
|
+ # text="filter",
|
|
|
+ # action="")) is True:
|
|
|
+ # Common.logger(log_type, crawler).info('已中过滤词\n')
|
|
|
+ # Common.logging(log_type, crawler, env, '已中过滤词\n')
|
|
|
+ # AliyunLogger.logging(
|
|
|
+ # code="2004",
|
|
|
+ # platform=crawler,
|
|
|
+ # mode=log_type,
|
|
|
+ # env=env,
|
|
|
+ # message='已中过滤词\n'
|
|
|
+ # )
|
|
|
+ # elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
|
|
|
+ # Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
+ # Common.logging(log_type, crawler, env, '视频已下载\n')
|
|
|
+ # AliyunLogger.logging(
|
|
|
+ # code="2002",
|
|
|
+ # platform=crawler,
|
|
|
+ # mode=log_type,
|
|
|
+ # env=env,
|
|
|
+ # message='视频已下载\n'
|
|
|
+ # )
|
|
|
+ #
|
|
|
+ # else:
|
|
|
+ # video_dict["out_user_id"] = video_dict["user_id"]
|
|
|
+ # video_dict["platform"] = crawler
|
|
|
+ # video_dict["strategy"] = log_type
|
|
|
+ # video_dict["strategy_type"] = "data"
|
|
|
+ # video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
+ # video_dict["width"] = video_dict["video_width"]
|
|
|
+ # video_dict["height"] = video_dict["video_height"]
|
|
|
+ # video_dict["crawler_rule"] = json.dumps(rule_dict)
|
|
|
+ # video_dict["user_id"] = our_uid
|
|
|
+ # video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
+ # cls.insert_video_id(log_type, crawler, video_id, env)
|
|
|
+ # AliyunLogger.logging(
|
|
|
+ # code="1010",
|
|
|
+ # platform=crawler,
|
|
|
+ # mode=log_type,
|
|
|
+ # env=env,
|
|
|
+ # message=f"看一看video_id:{video_id}入库",
|
|
|
+ # )
|
|
|
+ # mq.send_msg(video_dict)
|
|
|
time.sleep(random.randint(10, 15))
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
|