|
@@ -111,15 +111,19 @@ class XiaoniangaoHourScheduling:
|
|
|
r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
|
|
|
if 'data' not in r.text or r.status_code != 200:
|
|
|
Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
|
|
|
+ Common.logging(log_type, crawler, env, f"get_videoList:{r.text}\n")
|
|
|
return
|
|
|
elif "data" not in r.json():
|
|
|
Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()}\n")
|
|
|
+ Common.logging(log_type, crawler, env, f"get_videoList:{r.json()}\n")
|
|
|
return
|
|
|
elif "list" not in r.json()["data"]:
|
|
|
Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}\n")
|
|
|
+ Common.logging(log_type, crawler, env, f"get_videoList:{r.json()['data']}\n")
|
|
|
return
|
|
|
elif len(r.json()['data']['list']) == 0:
|
|
|
Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}\n")
|
|
|
+ Common.logging(log_type, crawler, env, f"get_videoList:{r.json()['data']['list']}\n")
|
|
|
return
|
|
|
else:
|
|
|
# 视频列表数据
|
|
@@ -168,13 +172,15 @@ class XiaoniangaoHourScheduling:
|
|
|
}
|
|
|
for k, v in video_dict.items():
|
|
|
Common.logger(log_type, crawler).info(f"{k}:{v}")
|
|
|
-
|
|
|
+ Common.logging(log_type, crawler, env, f"{video_dict}")
|
|
|
# 过滤无效视频
|
|
|
if video_title == "" or video_dict["video_id"] == "" or video_dict["video_url"] == "":
|
|
|
Common.logger(log_type, crawler).warning("无效视频\n")
|
|
|
+ Common.logging(log_type, crawler, env, "无效视频\n")
|
|
|
# 抓取基础规则过滤
|
|
|
elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
|
|
|
Common.logger(log_type, crawler).info("不满足抓取规则\n")
|
|
|
+ Common.logging(log_type, crawler, env, "不满足抓取规则\n")
|
|
|
elif any(str(word) if str(word) in video_dict["video_title"] else False
|
|
|
for word in get_config_from_mysql(log_type=log_type,
|
|
|
source=crawler,
|
|
@@ -182,8 +188,10 @@ class XiaoniangaoHourScheduling:
|
|
|
text="filter",
|
|
|
action="")) is True:
|
|
|
Common.logger(log_type, crawler).info('已中过滤词\n')
|
|
|
+ Common.logging(log_type, crawler, env, '已中过滤词\n')
|
|
|
elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
|
|
|
Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
+ Common.logging(log_type, crawler, env, '视频已下载\n')
|
|
|
else:
|
|
|
# 写入飞书小时级feeds数据库表
|
|
|
insert_sql = f""" insert into crawler_xiaoniangao_hour(profile_id,
|
|
@@ -214,10 +222,13 @@ class XiaoniangaoHourScheduling:
|
|
|
"{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))}"
|
|
|
)"""
|
|
|
Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
|
|
|
+ Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
|
|
|
MysqlHelper.update_values(log_type, crawler, insert_sql, env)
|
|
|
Common.logger(log_type, crawler).info('视频信息写入小时级数据库成功!\n')
|
|
|
+ Common.logging(log_type, crawler, env, '视频信息写入小时级数据库成功!\n')
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
|
|
|
+ Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
|
|
|
|
|
|
@classmethod
|
|
|
def get_video_info(cls, log_type, crawler, p_id, p_mid, v_title, v_id):
|
|
@@ -326,6 +337,7 @@ class XiaoniangaoHourScheduling:
|
|
|
update_video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env)
|
|
|
if len(update_video_list) == 0:
|
|
|
Common.logger(log_type, crawler).info("暂无需要更新的小时榜数据\n")
|
|
|
+ Common.logging(log_type, crawler, env, "暂无需要更新的小时榜数据\n")
|
|
|
return
|
|
|
for update_video_info in update_video_list:
|
|
|
try:
|
|
@@ -342,6 +354,7 @@ class XiaoniangaoHourScheduling:
|
|
|
v_id=video_id)
|
|
|
ten_play_cnt = video_info_dict['play_cnt']
|
|
|
Common.logger(log_type, crawler).info(f"ten_play_cnt:{ten_play_cnt}")
|
|
|
+ Common.logging(log_type, crawler, env, f"ten_play_cnt:{ten_play_cnt}")
|
|
|
update_sql = f""" update crawler_xiaoniangao_hour set ten_play_cnt={ten_play_cnt} WHERE out_video_id="{video_id}"; """
|
|
|
# Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
|
|
|
MysqlHelper.update_values(log_type, crawler, update_sql, env)
|
|
@@ -361,6 +374,7 @@ class XiaoniangaoHourScheduling:
|
|
|
v_id=video_id)
|
|
|
fifteen_play_cnt = video_info_dict['play_cnt']
|
|
|
Common.logger(log_type, crawler).info(f"fifteen_play_cnt:{fifteen_play_cnt}")
|
|
|
+ Common.logging(log_type, crawler, env, f"fifteen_play_cnt:{fifteen_play_cnt}")
|
|
|
update_sql = f""" update crawler_xiaoniangao_hour set fifteen_play_cnt={fifteen_play_cnt} WHERE out_video_id="{video_id}"; """
|
|
|
# Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
|
|
|
MysqlHelper.update_values(log_type, crawler, update_sql, env)
|
|
@@ -380,6 +394,7 @@ class XiaoniangaoHourScheduling:
|
|
|
v_id=video_id)
|
|
|
twenty_play_cnt = video_info_dict['play_cnt']
|
|
|
Common.logger(log_type, crawler).info(f"twenty_play_cnt:{twenty_play_cnt}")
|
|
|
+ Common.logging(log_type, crawler, env, f"twenty_play_cnt:{twenty_play_cnt}")
|
|
|
update_sql = f""" update crawler_xiaoniangao_hour set twenty_play_cnt={twenty_play_cnt} WHERE out_video_id="{video_id}"; """
|
|
|
# Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
|
|
|
MysqlHelper.update_values(log_type, crawler, update_sql, env)
|
|
@@ -394,6 +409,7 @@ class XiaoniangaoHourScheduling:
|
|
|
pass
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f'更新{update_video_info["video_title"]}时异常:{e}\n')
|
|
|
+ Common.logging(log_type, crawler, env, f'更新{update_video_info["video_title"]}时异常:{e}\n')
|
|
|
|
|
|
@classmethod
|
|
|
def download(cls, log_type, crawler, video_info_dict, rule_dict, our_uid, env):
|
|
@@ -406,11 +422,13 @@ class XiaoniangaoHourScheduling:
|
|
|
# 删除视频文件夹
|
|
|
shutil.rmtree(f"./{crawler}/videos/{md_title}")
|
|
|
Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
|
|
|
+ Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
|
|
|
return
|
|
|
except FileNotFoundError:
|
|
|
# 删除视频文件夹
|
|
|
shutil.rmtree(f"./{crawler}/videos/{md_title}")
|
|
|
Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
|
|
|
+ Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
|
|
|
return
|
|
|
# 下载封面
|
|
|
Common.download_method(log_type=log_type, crawler=crawler, text="cover", title=video_info_dict["video_title"],
|
|
@@ -420,6 +438,7 @@ class XiaoniangaoHourScheduling:
|
|
|
|
|
|
# 上传视频
|
|
|
Common.logger(log_type, crawler).info("开始上传视频...")
|
|
|
+ Common.logging(log_type, crawler, env, "开始上传视频...")
|
|
|
if env == "dev":
|
|
|
oss_endpoint = "out"
|
|
|
our_video_id = Publish.upload_and_publish(log_type=log_type,
|
|
@@ -477,8 +496,10 @@ class XiaoniangaoHourScheduling:
|
|
|
{int(video_info_dict['video_width'])},
|
|
|
{int(video_info_dict['video_height'])}) """
|
|
|
Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
|
|
|
+ Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
|
|
|
MysqlHelper.update_values(log_type, crawler, insert_sql, env)
|
|
|
Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
|
|
|
+ Common.logging(log_type, crawler, env, '视频信息插入数据库成功!')
|
|
|
|
|
|
# 视频写入飞书
|
|
|
Feishu.insert_columns(log_type, crawler, "yatRv2", "ROWS", 1, 2)
|
|
@@ -505,16 +526,18 @@ class XiaoniangaoHourScheduling:
|
|
|
time.sleep(1)
|
|
|
Feishu.update_values(log_type, crawler, "yatRv2", "F2:Z2", values)
|
|
|
Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
|
|
|
+ Common.logging(log_type, crawler, env, '视频信息写入飞书成功\n')
|
|
|
|
|
|
# 下载/上传
|
|
|
@classmethod
|
|
|
def download_publish(cls, log_type, crawler, video_info_dict, rule_dict, update_video_info, our_uid, env):
|
|
|
if cls.repeat_video(log_type, crawler, video_info_dict["video_id"], env) != 0:
|
|
|
Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
+ Common.logging(log_type, crawler, env, '视频已下载\n')
|
|
|
# 播放量大于 50000,直接下载
|
|
|
elif int(video_info_dict["play_cnt"]) >= 30000:
|
|
|
- Common.logger(log_type, crawler).info(
|
|
|
- f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
|
|
|
+ Common.logger(log_type, crawler).info(f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
|
|
|
+ Common.logging(log_type, crawler, env, f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
|
|
|
cls.download(log_type=log_type,
|
|
|
crawler=crawler,
|
|
|
video_info_dict=video_info_dict,
|
|
@@ -525,9 +548,10 @@ class XiaoniangaoHourScheduling:
|
|
|
# 上升榜判断逻辑,任意时间段上升量>=5000,连续两个时间段上升量>=2000
|
|
|
elif int(update_video_info['ten_play_cnt']) >= 3000 or int(
|
|
|
update_video_info['fifteen_play_cnt']) >= 3000 or int(update_video_info['twenty_play_cnt']) >= 3000:
|
|
|
- Common.logger(log_type, crawler).info(
|
|
|
- f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
|
|
|
+ Common.logger(log_type, crawler).info(f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
|
|
|
+ Common.logging(log_type, crawler, env, f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
+ Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
|
|
|
cls.download(log_type=log_type,
|
|
|
crawler=crawler,
|
|
|
video_info_dict=video_info_dict,
|
|
@@ -536,9 +560,10 @@ class XiaoniangaoHourScheduling:
|
|
|
env=env)
|
|
|
|
|
|
elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['fifteen_play_cnt']) >= 1000:
|
|
|
- Common.logger(log_type, crawler).info(
|
|
|
- f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
|
|
|
+ Common.logger(log_type, crawler).info(f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
|
|
|
+ Common.logging(log_type, crawler, env, f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
+ Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
|
|
|
cls.download(log_type=log_type,
|
|
|
crawler=crawler,
|
|
|
video_info_dict=video_info_dict,
|
|
@@ -549,7 +574,9 @@ class XiaoniangaoHourScheduling:
|
|
|
elif int(update_video_info['fifteen_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
|
|
|
+ Common.logging(log_type, crawler, env, f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
+ Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
|
|
|
cls.download(log_type=log_type,
|
|
|
crawler=crawler,
|
|
|
video_info_dict=video_info_dict,
|
|
@@ -560,7 +587,9 @@ class XiaoniangaoHourScheduling:
|
|
|
elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
|
|
|
+ Common.logging(log_type, crawler, env, f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
+ Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
|
|
|
cls.download(log_type=log_type,
|
|
|
crawler=crawler,
|
|
|
video_info_dict=video_info_dict,
|
|
@@ -570,6 +599,7 @@ class XiaoniangaoHourScheduling:
|
|
|
|
|
|
else:
|
|
|
Common.logger(log_type, crawler).info("上升量不满足下载规则")
|
|
|
+ Common.logging(log_type, crawler, env, "上升量不满足下载规则")
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|