|
@@ -7,7 +7,10 @@ import sys
|
|
|
import time
|
|
|
import requests
|
|
|
import urllib3
|
|
|
+
|
|
|
+
|
|
|
sys.path.append(os.getcwd())
|
|
|
+from common import AliyunLogger
|
|
|
from common.mq import MQ
|
|
|
from common.common import Common
|
|
|
from common.scheduling_db import MysqlHelper
|
|
@@ -59,12 +62,26 @@ class KanyikanRecommend:
|
|
|
if "data" not in response.text:
|
|
|
Common.logger(log_type, crawler).info("获取视频list时,session过期,随机睡眠 31-50 秒")
|
|
|
Common.logging(log_type, crawler, env, "获取视频list时,session过期,随机睡眠 31-50 秒")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"获取视频list时,session过期,随机睡眠 31-50 秒"
|
|
|
+ )
|
|
|
# 如果返回空信息,则随机睡眠 31-40 秒
|
|
|
time.sleep(random.randint(31, 40))
|
|
|
cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
|
|
|
elif "items" not in response.json()["data"]:
|
|
|
Common.logger(log_type, crawler).info(f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
|
|
|
Common.logging(log_type, crawler, env, f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"get_feeds:{response.json()},随机睡眠 1-3 分钟"
|
|
|
+ )
|
|
|
# 如果返回空信息,则随机睡眠 1-3 分钟
|
|
|
time.sleep(random.randint(60, 180))
|
|
|
cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
|
|
@@ -76,8 +93,6 @@ class KanyikanRecommend:
|
|
|
def get_videoList(cls, log_type, crawler, our_uid, rule_dict, env):
|
|
|
mq = MQ(topic_name="topic_crawler_etl_" + env)
|
|
|
try:
|
|
|
- Common.logger(log_type, crawler).info(f"正在抓取列表页")
|
|
|
- Common.logging(log_type, crawler, env, f"正在抓取列表页")
|
|
|
session = Common.get_session(log_type, crawler, env)
|
|
|
if session is None:
|
|
|
time.sleep(1)
|
|
@@ -124,12 +139,26 @@ class KanyikanRecommend:
|
|
|
if "data" not in response.text:
|
|
|
Common.logger(log_type, crawler).info("获取视频list时,session过期,随机睡眠 31-50 秒")
|
|
|
Common.logging(log_type, crawler, env, "获取视频list时,session过期,随机睡眠 31-50 秒")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"获取视频list时,session过期,随机睡眠 31-50 秒"
|
|
|
+ )
|
|
|
# 如果返回空信息,则随机睡眠 31-40 秒
|
|
|
time.sleep(random.randint(31, 40))
|
|
|
cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
|
|
|
elif "items" not in response.json()["data"]:
|
|
|
Common.logger(log_type, crawler).info(f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
|
|
|
Common.logging(log_type, crawler, env, f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"get_feeds:{response.json()},随机睡眠 1-3 分钟"
|
|
|
+ )
|
|
|
# 如果返回空信息,则随机睡眠 1-3 分钟
|
|
|
time.sleep(random.randint(60, 180))
|
|
|
cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
|
|
@@ -137,9 +166,23 @@ class KanyikanRecommend:
|
|
|
if feeds == "":
|
|
|
Common.logger(log_type, crawler).info(f"feeds:{feeds}")
|
|
|
Common.logging(log_type, crawler, env, f"feeds:{feeds}")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2001",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"没有更多视频啦 ~\n"
|
|
|
+ )
|
|
|
return
|
|
|
for j in range(len(feeds)):
|
|
|
try:
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1001",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='扫描到一条视频\n'
|
|
|
+ )
|
|
|
video_title = feeds[j].get("title", "").strip().replace("\n", "") \
|
|
|
.replace("/", "").replace("\\", "").replace("\r", "") \
|
|
|
.replace(":", "").replace("*", "").replace("?", "") \
|
|
@@ -191,15 +234,36 @@ class KanyikanRecommend:
|
|
|
for k, v in video_dict.items():
|
|
|
Common.logger(log_type, crawler).info(f"{k}:{v}")
|
|
|
Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"{video_dict}\n"
|
|
|
+ )
|
|
|
|
|
|
if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict[
|
|
|
"video_url"] == "":
|
|
|
Common.logger(log_type, crawler).info("无效视频\n")
|
|
|
Common.logging(log_type, crawler, env, "无效视频\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2004",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='无效视频\n'
|
|
|
+ )
|
|
|
elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict,
|
|
|
rule_dict=rule_dict) is False:
|
|
|
Common.logger(log_type, crawler).info("不满足抓取规则\n")
|
|
|
Common.logging(log_type, crawler, env, "不满足抓取规则\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2004",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='不满足抓取规则\n'
|
|
|
+ )
|
|
|
elif any(str(word) if str(word) in video_dict["video_title"] else False
|
|
|
for word in get_config_from_mysql(log_type=log_type,
|
|
|
source=crawler,
|
|
@@ -208,9 +272,23 @@ class KanyikanRecommend:
|
|
|
action="")) is True:
|
|
|
Common.logger(log_type, crawler).info('已中过滤词\n')
|
|
|
Common.logging(log_type, crawler, env, '已中过滤词\n')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2004",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='已中过滤词\n'
|
|
|
+ )
|
|
|
elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
|
|
|
Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
Common.logging(log_type, crawler, env, '视频已下载\n')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2002",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='视频已下载\n'
|
|
|
+ )
|
|
|
else:
|
|
|
video_dict["out_user_id"] = video_dict["user_id"]
|
|
|
video_dict["platform"] = crawler
|
|
@@ -226,9 +304,23 @@ class KanyikanRecommend:
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
|
|
|
Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="3000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"抓取单条视频异常:{e}\n"
|
|
|
+ )
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f"抓取列表页时异常:{e}\n")
|
|
|
Common.logging(log_type, crawler, env, f"抓取列表页时异常:{e}\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="3000",
|
|
|
+ platform=crawler,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"抓取列表页时异常:{e}\n"
|
|
|
+ )
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|