|
@@ -3,6 +3,7 @@
|
|
|
# @Time: 2022/4/8
|
|
|
import json
|
|
|
import os
|
|
|
+import random
|
|
|
import sys
|
|
|
import time
|
|
|
import requests
|
|
@@ -14,23 +15,43 @@ from main.publish import Publish
|
|
|
proxies = {"http": None, "https": None}
|
|
|
|
|
|
|
|
|
-class DownloadRecommend:
|
|
|
-
|
|
|
+class Recommend:
|
|
|
# 配置微信号
|
|
|
- Referer = Feishu.get_range_value("recommend", "9fTK1f", "C3:C3")[0]
|
|
|
- wesee_openid = Feishu.get_range_value("recommend", "9fTK1f", "C4:C4")[0]
|
|
|
- wesee_openkey = Feishu.get_range_value("recommend", "9fTK1f", "C5:C5")[0]
|
|
|
- wesee_personid = Feishu.get_range_value("recommend", "9fTK1f", "C6:C6")[0]
|
|
|
- wesee_access_token = Feishu.get_range_value("recommend", "9fTK1f", "C7:C7")[0]
|
|
|
- wesee_thr_appid = Feishu.get_range_value("recommend", "9fTK1f", "C8:C8")[0]
|
|
|
-
|
|
|
- # 过滤词库
|
|
|
+ wechat_sheet = Feishu.get_values_batch('recommend', 'weishi', '9fTK1f')
|
|
|
+ Referer = wechat_sheet[2][2]
|
|
|
+ wesee_openid = wechat_sheet[3][2]
|
|
|
+ wesee_openkey = wechat_sheet[4][2]
|
|
|
+ wesee_personid = wechat_sheet[5][2]
|
|
|
+ wesee_access_token = wechat_sheet[6][2]
|
|
|
+ wesee_thr_appid = wechat_sheet[7][2]
|
|
|
+
|
|
|
+ # 已抓取视频数
|
|
|
+ video_count = []
|
|
|
+ crawler_count = 50
|
|
|
+
|
|
|
+ # 标题过滤词库
|
|
|
@classmethod
|
|
|
- def sensitive_words(cls):
|
|
|
+ def video_title_sensitive_words(cls, log_type):
|
|
|
# 敏感词库列表
|
|
|
word_list = []
|
|
|
# 从云文档读取所有敏感词,添加到词库列表
|
|
|
- lists = Feishu.get_values_batch("recommend", "2Oxf8C")
|
|
|
+ lists = Feishu.get_values_batch(log_type, 'weishi', "2Oxf8C")
|
|
|
+ for a in lists:
|
|
|
+ for j in a:
|
|
|
+ # 过滤空的单元格内容
|
|
|
+ if j is None:
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ word_list.append(j)
|
|
|
+ return word_list
|
|
|
+
|
|
|
+ # 用户名过滤词库
|
|
|
+ @classmethod
|
|
|
+ def username_sensitive_words(cls, log_type):
|
|
|
+ # 敏感词库列表
|
|
|
+ word_list = []
|
|
|
+ # 从云文档读取所有敏感词,添加到词库列表
|
|
|
+ lists = Feishu.get_values_batch(log_type, 'weishi', "KnVAc2")
|
|
|
for a in lists:
|
|
|
for j in a:
|
|
|
# 过滤空的单元格内容
|
|
@@ -42,27 +63,19 @@ class DownloadRecommend:
|
|
|
|
|
|
# 抓取基础规则
|
|
|
@staticmethod
|
|
|
- def download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt):
|
|
|
+ def download_rule(duration, width, height, like_cnt):
|
|
|
"""
|
|
|
下载视频的基本规则
|
|
|
- :param d_duration: 时长
|
|
|
- :param d_width: 宽
|
|
|
- :param d_height: 高
|
|
|
- :param d_play_cnt: 播放量
|
|
|
- :param d_like_cnt: 点赞量
|
|
|
- :param d_share_cnt: 分享量
|
|
|
+ :param duration: 时长
|
|
|
+ :param width: 宽
|
|
|
+ :param height: 高
|
|
|
+ :param like_cnt: 点赞量
|
|
|
:return: 满足规则,返回 True;反之,返回 False
|
|
|
"""
|
|
|
- if int(float(d_duration)) >= 30:
|
|
|
- if int(d_width) >= 720 or int(d_height) >= 720:
|
|
|
- if int(d_play_cnt) >= 0:
|
|
|
- if int(d_like_cnt) >= 0:
|
|
|
- if int(d_share_cnt) >= 0:
|
|
|
- return True
|
|
|
- else:
|
|
|
- return False
|
|
|
- else:
|
|
|
- return False
|
|
|
+ if int(float(duration)) >= 60:
|
|
|
+ if int(width) >= 720 or int(height) >= 720:
|
|
|
+ if int(like_cnt) >= 1000:
|
|
|
+ return True
|
|
|
else:
|
|
|
return False
|
|
|
return False
|
|
@@ -70,7 +83,7 @@ class DownloadRecommend:
|
|
|
|
|
|
# 抓取列表
|
|
|
@classmethod
|
|
|
- def get_feeds(cls):
|
|
|
+ def get_feeds(cls, log_type):
|
|
|
"""
|
|
|
1.从微视小程序首页推荐,获取视频列表
|
|
|
2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa 中去重
|
|
@@ -112,253 +125,253 @@ class DownloadRecommend:
|
|
|
"mapExt": "{\"imageSize\":\"480\",\"adaptScene\":\"PicHDWebpLimitScene\"}"
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
try:
|
|
|
- urllib3.disable_warnings()
|
|
|
- r = requests.post(headers=headers, url=url, cookies=cookies, json=json_data, proxies=proxies, verify=False)
|
|
|
- response = json.loads(r.content.decode("utf8"))
|
|
|
- feeds = response["rsp_body"]["feeds"]
|
|
|
- for i in range(len(feeds)):
|
|
|
- # 视频标题过滤话题及处理特殊字符
|
|
|
- weishi_title = feeds[i]["desc"]
|
|
|
- title_split1 = weishi_title.split(" #")
|
|
|
- if title_split1[0] != "":
|
|
|
- title1 = title_split1[0]
|
|
|
- else:
|
|
|
- title1 = title_split1[-1]
|
|
|
+ while True:
|
|
|
+ urllib3.disable_warnings()
|
|
|
+ r = requests.post(headers=headers, url=url, cookies=cookies, json=json_data, proxies=proxies,
|
|
|
+ verify=False)
|
|
|
+ response = json.loads(r.content.decode("utf8"))
|
|
|
+ feeds = response["rsp_body"]["feeds"]
|
|
|
+ for i in range(len(feeds)):
|
|
|
+ # 视频标题过滤话题及处理特殊字符
|
|
|
+ weishi_title = feeds[i]["desc"]
|
|
|
+ title_split1 = weishi_title.split(" #")
|
|
|
+ if title_split1[0] != "":
|
|
|
+ title1 = title_split1[0]
|
|
|
+ else:
|
|
|
+ title1 = title_split1[-1]
|
|
|
|
|
|
- title_split2 = title1.split(" #")
|
|
|
- if title_split2[0] != "":
|
|
|
- title2 = title_split2[0]
|
|
|
- else:
|
|
|
- title2 = title_split2[-1]
|
|
|
+ title_split2 = title1.split(" #")
|
|
|
+ if title_split2[0] != "":
|
|
|
+ title2 = title_split2[0]
|
|
|
+ else:
|
|
|
+ title2 = title_split2[-1]
|
|
|
|
|
|
- title_split3 = title2.split("@")
|
|
|
- if title_split3[0] != "":
|
|
|
- title3 = title_split3[0]
|
|
|
- else:
|
|
|
- title3 = title_split3[-1]
|
|
|
- # 视频标题
|
|
|
- video_title = title3.strip().replace("\n", "") \
|
|
|
- .replace("/", "").replace("快手", "").replace(" ", "") \
|
|
|
- .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
|
|
|
- .replace("#", "").replace(".", "。").replace("\\", "") \
|
|
|
- .replace(":", "").replace("*", "").replace("?", "") \
|
|
|
- .replace("?", "").replace('"', "").replace("<", "") \
|
|
|
- .replace(">", "").replace("|", "").replace("微视", "")
|
|
|
-
|
|
|
- # 视频 ID
|
|
|
- if "id" not in feeds[i]["video"]:
|
|
|
- video_id = 0
|
|
|
- else:
|
|
|
- video_id = feeds[i]["video"]["id"]
|
|
|
+ title_split3 = title2.split("@")
|
|
|
+ if title_split3[0] != "":
|
|
|
+ title3 = title_split3[0]
|
|
|
+ else:
|
|
|
+ title3 = title_split3[-1]
|
|
|
+ # 视频标题
|
|
|
+ video_title = title3.strip().replace("\n", "").replace("/", "")\
|
|
|
+ .replace("快手", "").replace(" ", "").replace(" ", "").replace("&NBSP", "")\
|
|
|
+ .replace("\r", "").replace("#", "").replace(".", "。").replace("\\", "").replace(":", "")\
|
|
|
+ .replace("*", "").replace("?", "").replace("?", "").replace('"', "").replace("<", "")\
|
|
|
+ .replace(">", "").replace("|", "").replace("微视", "")[:40]
|
|
|
+
|
|
|
+ # 视频 ID
|
|
|
+ if "id" not in feeds[i]["video"]:
|
|
|
+ video_id = 0
|
|
|
+ else:
|
|
|
+ video_id = feeds[i]["video"]["id"]
|
|
|
|
|
|
- # 播放数
|
|
|
- if "playNum" not in feeds[i]["ugcData"]:
|
|
|
- video_play_cnt = 0
|
|
|
- else:
|
|
|
- video_play_cnt = feeds[i]["ugcData"]["playNum"]
|
|
|
+ # 播放数
|
|
|
+ if "playNum" not in feeds[i]["ugcData"]:
|
|
|
+ video_play_cnt = 0
|
|
|
+ else:
|
|
|
+ video_play_cnt = feeds[i]["ugcData"]["playNum"]
|
|
|
|
|
|
- # 点赞数
|
|
|
- if "dingCount" not in feeds[i]["ugcData"]:
|
|
|
- video_like_cnt = 0
|
|
|
- else:
|
|
|
- video_like_cnt = feeds[i]["ugcData"]["dingCount"]
|
|
|
+ # 点赞数
|
|
|
+ if "dingCount" not in feeds[i]["ugcData"]:
|
|
|
+ video_like_cnt = 0
|
|
|
+ else:
|
|
|
+ video_like_cnt = feeds[i]["ugcData"]["dingCount"]
|
|
|
|
|
|
- # 分享数
|
|
|
- if "shareNum" not in feeds[i]["ugcData"]:
|
|
|
- video_share_cnt = 0
|
|
|
- else:
|
|
|
- video_share_cnt = feeds[i]["ugcData"]["shareNum"]
|
|
|
+ # 分享数
|
|
|
+ if "shareNum" not in feeds[i]["ugcData"]:
|
|
|
+ video_share_cnt = 0
|
|
|
+ else:
|
|
|
+ video_share_cnt = feeds[i]["ugcData"]["shareNum"]
|
|
|
|
|
|
- # 评论数
|
|
|
- if "totalCommentNum" not in feeds[i]["ugcData"]:
|
|
|
- video_comment_cnt = 0
|
|
|
- else:
|
|
|
- video_comment_cnt = feeds[i]["ugcData"]["totalCommentNum"]
|
|
|
+ # 评论数
|
|
|
+ if "totalCommentNum" not in feeds[i]["ugcData"]:
|
|
|
+ video_comment_cnt = 0
|
|
|
+ else:
|
|
|
+ video_comment_cnt = feeds[i]["ugcData"]["totalCommentNum"]
|
|
|
|
|
|
- # 视频时长
|
|
|
- if "duration" not in feeds[i]["video"]:
|
|
|
- video_duration = 0
|
|
|
- else:
|
|
|
- video_duration = int(int(feeds[i]["video"]["duration"]) / 1000)
|
|
|
+ # 视频时长
|
|
|
+ if "duration" not in feeds[i]["video"]:
|
|
|
+ video_duration = 0
|
|
|
+ else:
|
|
|
+ video_duration = int(int(feeds[i]["video"]["duration"]) / 1000)
|
|
|
|
|
|
- # 视频宽高
|
|
|
- if "width" not in feeds[i]["video"] or "height" not in feeds[i]["video"]:
|
|
|
- video_width = 0
|
|
|
- video_height = 0
|
|
|
- video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
- else:
|
|
|
- video_width = feeds[i]["video"]["width"]
|
|
|
- video_height = feeds[i]["video"]["height"]
|
|
|
- video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
+ # 视频宽高
|
|
|
+ if "width" not in feeds[i]["video"] or "height" not in feeds[i]["video"]:
|
|
|
+ video_width = 0
|
|
|
+ video_height = 0
|
|
|
+ video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
+ else:
|
|
|
+ video_width = feeds[i]["video"]["width"]
|
|
|
+ video_height = feeds[i]["video"]["height"]
|
|
|
+ video_resolution = str(video_width) + "*" + str(video_height)
|
|
|
|
|
|
- # 视频发布时间
|
|
|
- if "createTime" not in feeds[i]:
|
|
|
- video_send_time = 0
|
|
|
- else:
|
|
|
- video_send_time = int(feeds[i]["createTime"]) * 1000
|
|
|
+ # 视频发布时间
|
|
|
+ if "createTime" not in feeds[i]:
|
|
|
+ video_send_time = 0
|
|
|
+ else:
|
|
|
+ video_send_time = int(feeds[i]["createTime"]) * 1000
|
|
|
|
|
|
- # 用户昵称
|
|
|
- user_name = feeds[i]["poster"]["nick"].strip().replace("\n", "") \
|
|
|
- .replace("/", "").replace("快手", "").replace(" ", "") \
|
|
|
- .replace(" ", "").replace("&NBSP", "").replace("\r", "").replace("微视", "")
|
|
|
+ # 用户昵称
|
|
|
+ user_name = feeds[i]["poster"]["nick"].strip().replace("\n", "") \
|
|
|
+ .replace("/", "").replace("快手", "").replace(" ", "") \
|
|
|
+ .replace(" ", "").replace("&NBSP", "").replace("\r", "").replace("微视", "")
|
|
|
|
|
|
- # 用户 ID
|
|
|
- user_id = feeds[i]["poster"]["id"]
|
|
|
+ # 用户 ID
|
|
|
+ user_id = feeds[i]["poster"]["id"]
|
|
|
|
|
|
- # 用户头像地址
|
|
|
- if "thumbURL" not in feeds[i]["material"] and "avatar" not in feeds[i]["poster"]:
|
|
|
- head_url = 0
|
|
|
- elif "thumbURL" in feeds[i]["material"]:
|
|
|
- head_url = feeds[i]["material"]["thumbURL"]
|
|
|
- else:
|
|
|
- head_url = feeds[i]["poster"]["avatar"]
|
|
|
+ # 用户头像地址
|
|
|
+ if "thumbURL" not in feeds[i]["material"] and "avatar" not in feeds[i]["poster"]:
|
|
|
+ head_url = 0
|
|
|
+ elif "thumbURL" in feeds[i]["material"]:
|
|
|
+ head_url = feeds[i]["material"]["thumbURL"]
|
|
|
+ else:
|
|
|
+ head_url = feeds[i]["poster"]["avatar"]
|
|
|
|
|
|
- # 视频封面地址
|
|
|
- if len(feeds[i]["images"]) == 0:
|
|
|
- cover_url = 0
|
|
|
- else:
|
|
|
- cover_url = feeds[i]["images"][0]["url"]
|
|
|
+ # 视频封面地址
|
|
|
+ if len(feeds[i]["images"]) == 0:
|
|
|
+ cover_url = 0
|
|
|
+ else:
|
|
|
+ cover_url = feeds[i]["images"][0]["url"]
|
|
|
|
|
|
- # 视频播放地址
|
|
|
- if "url" not in feeds[i]["video"]:
|
|
|
- video_url = 0
|
|
|
- else:
|
|
|
- video_url = feeds[i]["video"]["url"]
|
|
|
-
|
|
|
- Common.logger("recommend").info("video_title:{}".format(video_title))
|
|
|
- Common.logger("recommend").info("video_id:{}".format(video_id))
|
|
|
- Common.logger("recommend").info("video_play_cnt:{}".format(video_play_cnt))
|
|
|
- Common.logger("recommend").info("video_like_cnt:{}".format(video_like_cnt))
|
|
|
- Common.logger("recommend").info("video_share_cnt:{}".format(video_share_cnt))
|
|
|
- # Common.logger("recommend").info("video_comment_cnt:{}".format(video_comment_cnt))
|
|
|
- Common.logger("recommend").info("video_duration:{}秒".format(video_duration))
|
|
|
- # Common.logger("recommend").info("video_resolution:{}".format(video_resolution))
|
|
|
- Common.logger("recommend").info(
|
|
|
- "video_send_time:{}".format(time.strftime(
|
|
|
- "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
|
|
|
- Common.logger("recommend").info("user_name:{}".format(user_name))
|
|
|
- # Common.logger("recommend").info("user_id:{}".format(user_id))
|
|
|
- # Common.logger("recommend").info("head_url:{}".format(head_url))
|
|
|
- # Common.logger("recommend").info("cover_url:{}".format(cover_url))
|
|
|
- Common.logger("recommend").info("video_url:{}".format(video_url))
|
|
|
-
|
|
|
- # 过滤无效视频
|
|
|
- if video_id == 0 or video_duration == 0 or video_send_time == 0 or head_url == 0 \
|
|
|
- or cover_url == 0 or video_url == 0:
|
|
|
- Common.logger("recommend").info("无效视频")
|
|
|
- # 判断基础规则
|
|
|
- elif cls.download_rule(video_duration, video_width, video_height,
|
|
|
- video_play_cnt, video_like_cnt, video_share_cnt) is False:
|
|
|
- Common.logger("recommend").info("不满足基础规则")
|
|
|
- # 判断敏感词
|
|
|
- elif any(word if word in weishi_title else False for word in cls.sensitive_words()) is True:
|
|
|
- Common.logger("recommend").info("视频已中敏感词:{}".format(weishi_title))
|
|
|
- # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa
|
|
|
- elif video_id in [j for m in Feishu.get_values_batch("recommend", "caa3fa") for j in m]:
|
|
|
- Common.logger("recommend").info("该视频已下载:{}", video_title)
|
|
|
- # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr
|
|
|
- elif video_id in [j for n in Feishu.get_values_batch("recommend", "O7fCzr") for j in n]:
|
|
|
- Common.logger("recommend").info("该视频已在feeds中:{}", video_title)
|
|
|
- else:
|
|
|
- Common.logger("recommend").info("该视频未下载,添加至feeds中:{}".format(video_title))
|
|
|
- # feeds工作表,插入首行
|
|
|
- time.sleep(1)
|
|
|
- Feishu.insert_columns("recommend", "O7fCzr", "ROWS", 1, 2)
|
|
|
- # 获取当前时间
|
|
|
- get_feeds_time = int(time.time())
|
|
|
- # 工作表 feeds 中写入数据
|
|
|
- values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(get_feeds_time))),
|
|
|
- "推荐榜",
|
|
|
- video_id,
|
|
|
- video_title,
|
|
|
- video_play_cnt,
|
|
|
- video_comment_cnt,
|
|
|
- video_like_cnt,
|
|
|
- video_share_cnt,
|
|
|
- video_duration,
|
|
|
- video_resolution,
|
|
|
- time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time / 1000))),
|
|
|
- user_name,
|
|
|
- user_id,
|
|
|
- head_url,
|
|
|
- cover_url,
|
|
|
- video_url]]
|
|
|
- # 等待 1s,防止操作云文档太频繁,导致报错
|
|
|
- time.sleep(1)
|
|
|
- Feishu.update_values("recommend", "O7fCzr", "A2:P2", values)
|
|
|
+ # 视频播放地址
|
|
|
+ if "url" not in feeds[i]["video"]:
|
|
|
+ video_url = 0
|
|
|
+ else:
|
|
|
+ video_url = feeds[i]["video"]["url"]
|
|
|
+
|
|
|
+ Common.logger(log_type).info("video_title:{}".format(video_title))
|
|
|
+ Common.logger(log_type).info("video_id:{}".format(video_id))
|
|
|
+ Common.logger(log_type).info("video_like_cnt:{}".format(video_like_cnt))
|
|
|
+ Common.logger(log_type).info("video_share_cnt:{}".format(video_share_cnt))
|
|
|
+ Common.logger(log_type).info("video_comment_cnt:{}".format(video_comment_cnt))
|
|
|
+ Common.logger(log_type).info("video_duration:{}秒".format(video_duration))
|
|
|
+ Common.logger(log_type).info(
|
|
|
+ "video_send_time:{}".format(time.strftime(
|
|
|
+ "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
|
|
|
+ Common.logger(log_type).info("user_name:{}".format(user_name))
|
|
|
+ Common.logger(log_type).info("video_url:{}".format(video_url))
|
|
|
+ # Common.logger(log_type).info("video_play_cnt:{}".format(video_play_cnt))
|
|
|
+ # Common.logger(log_type).info("video_resolution:{}".format(video_resolution))
|
|
|
+ # Common.logger(log_type).info("user_id:{}".format(user_id))
|
|
|
+ # Common.logger(log_type).info("head_url:{}".format(head_url))
|
|
|
+ # Common.logger(log_type).info("cover_url:{}".format(cover_url))
|
|
|
+
|
|
|
+ # 过滤无效视频
|
|
|
+ if video_id == 0 or video_duration == 0 or video_send_time == 0 or head_url == 0 \
|
|
|
+ or cover_url == 0 or video_url == 0:
|
|
|
+ Common.logger(log_type).info("无效视频\n")
|
|
|
+ # 判断基础规则
|
|
|
+ elif cls.download_rule(video_duration, video_width, video_height, video_like_cnt) is False:
|
|
|
+ Common.logger(log_type).info("不满足基础规则\n")
|
|
|
+ # 标题敏感词过滤
|
|
|
+ elif any(word if word in weishi_title else False for word in
|
|
|
+ cls.video_title_sensitive_words(log_type)) is True:
|
|
|
+ Common.logger(log_type).info("标题已中敏感词:{}\n".format(weishi_title))
|
|
|
+ # 用户名敏感词过滤
|
|
|
+ elif any(word if word in user_name else False for word in
|
|
|
+ cls.username_sensitive_words(log_type)) is True:
|
|
|
+ Common.logger(log_type).info("用户名已中敏感词:{}\n".format(user_name))
|
|
|
+ # 从已下载云文档去重
|
|
|
+ elif str(video_id) in [j for m in Feishu.get_values_batch(log_type, 'weishi', "caa3fa") for j in m]:
|
|
|
+ Common.logger(log_type).info("视频已下载:{}\n", video_title)
|
|
|
+ # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr
|
|
|
+ elif str(video_id) in [j for n in Feishu.get_values_batch(log_type, 'weishi', "O7fCzr") for j in n]:
|
|
|
+ Common.logger(log_type).info("视频已存在:{}\n", video_title)
|
|
|
+ else:
|
|
|
+ # 添加到已下载视频列表
|
|
|
+ cls.video_count.append(video_id)
|
|
|
+
|
|
|
+ # feeds工作表,插入首行
|
|
|
+ Feishu.insert_columns(log_type, 'weishi', "O7fCzr", "ROWS", 1, 2)
|
|
|
+ # 获取当前时间
|
|
|
+ get_feeds_time = int(time.time())
|
|
|
+ # 工作表 feeds 中写入数据
|
|
|
+ values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(get_feeds_time))),
|
|
|
+ "推荐榜",
|
|
|
+ str(video_id),
|
|
|
+ video_title,
|
|
|
+ int(video_play_cnt),
|
|
|
+ int(video_comment_cnt),
|
|
|
+ int(video_like_cnt),
|
|
|
+ int(video_share_cnt),
|
|
|
+ video_duration,
|
|
|
+ video_resolution,
|
|
|
+ time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time / 1000))),
|
|
|
+ user_name,
|
|
|
+ user_id,
|
|
|
+ head_url,
|
|
|
+ cover_url,
|
|
|
+ video_url]]
|
|
|
+ # 等待 1s,防止操作云文档太频繁,导致报错
|
|
|
+ time.sleep(1)
|
|
|
+ Feishu.update_values(log_type, 'weishi', "O7fCzr", "A2:T2", values)
|
|
|
+ Common.logger(log_type).info("视频保存至云文档成功\n")
|
|
|
+ time.sleep(random.randint(3, 5))
|
|
|
+
|
|
|
+ # 每天抓取 50 条
|
|
|
+ if len(cls.video_count) >= cls.crawler_count:
|
|
|
+ Common.logger(log_type).info("已抓取{}条数据\n", len(cls.video_count))
|
|
|
+ cls.video_count = []
|
|
|
+ return
|
|
|
except Exception as e:
|
|
|
- Common.logger("recommend").error("获取微视视频list异常:{}".format(e))
|
|
|
+ Common.logger(log_type).error("get_feeds异常:{}\n".format(e))
|
|
|
|
|
|
- # 下载/上传视频
|
|
|
+ # 下载/上传
|
|
|
@classmethod
|
|
|
- def download_publish(cls):
|
|
|
+ def download_publish(cls, log_type, env):
|
|
|
try:
|
|
|
- for i in range(1, len(Feishu.get_values_batch("recommend", "O7fCzr")) + 1):
|
|
|
- time.sleep(1)
|
|
|
- download_video_id = Feishu.get_values_batch("recommend", "O7fCzr")[i][2]
|
|
|
- download_video_title = Feishu.get_values_batch("recommend", "O7fCzr")[i][3]
|
|
|
- download_video_play_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][4]
|
|
|
- download_video_comment_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][5]
|
|
|
- download_video_like_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][6]
|
|
|
- download_video_share_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][7]
|
|
|
- download_video_duration = Feishu.get_values_batch("recommend", "O7fCzr")[i][8]
|
|
|
- download_video_resolution = Feishu.get_values_batch("recommend", "O7fCzr")[i][9]
|
|
|
- # download_video_width = download_video_resolution.split("*")[0]
|
|
|
- # download_video_height = download_video_resolution.split("*")[-1]
|
|
|
- download_video_send_time = Feishu.get_values_batch("recommend", "O7fCzr")[i][10]
|
|
|
- download_user_name = Feishu.get_values_batch("recommend", "O7fCzr")[i][11]
|
|
|
- download_user_id = Feishu.get_values_batch("recommend", "O7fCzr")[i][12]
|
|
|
- download_head_url = Feishu.get_values_batch("recommend", "O7fCzr")[i][13]
|
|
|
- download_cover_url = Feishu.get_values_batch("recommend", "O7fCzr")[i][14]
|
|
|
- download_video_url = Feishu.get_values_batch("recommend", "O7fCzr")[i][15]
|
|
|
-
|
|
|
- # Common.logger("recommend").info("download_video_id:{}", download_video_id)
|
|
|
- # Common.logger("recommend").info("download_video_title:{}", download_video_title)
|
|
|
- # Common.logger("recommend").info("download_video_play_cnt:{}", download_video_play_cnt)
|
|
|
- # Common.logger("recommend").info("download_video_comment_cnt:{}", download_video_comment_cnt)
|
|
|
- # Common.logger("recommend").info("download_video_like_cnt:{}", download_video_like_cnt)
|
|
|
- # Common.logger("recommend").info("download_video_share_cnt:{}", download_video_share_cnt)
|
|
|
- # Common.logger("recommend").info("download_video_duration:{}", download_video_duration)
|
|
|
- # Common.logger("recommend").info("download_video_resolution:{}", download_video_resolution)
|
|
|
- # Common.logger("recommend").info("download_video_send_time:{}", download_video_send_time)
|
|
|
- # Common.logger("recommend").info("download_user_name:{}", download_user_name)
|
|
|
- # Common.logger("recommend").info("download_user_id:{}", download_user_id)
|
|
|
- # Common.logger("recommend").info("download_head_url:{}", download_head_url)
|
|
|
- # Common.logger("recommend").info("download_cover_url:{}", download_cover_url)
|
|
|
- # Common.logger("recommend").info("download_video_url:{}", download_video_url)
|
|
|
-
|
|
|
- Common.logger("recommend").info("正在判断第{}行,视频:{}", i, download_video_title)
|
|
|
+ recommend_sheet = Feishu.get_values_batch(log_type, 'weishi', "O7fCzr")
|
|
|
+ for i in range(1, len(recommend_sheet)):
|
|
|
+ download_video_id = recommend_sheet[i][2]
|
|
|
+ download_video_title = recommend_sheet[i][3]
|
|
|
+ download_video_play_cnt = recommend_sheet[i][4]
|
|
|
+ download_video_comment_cnt = recommend_sheet[i][5]
|
|
|
+ download_video_like_cnt = recommend_sheet[i][6]
|
|
|
+ download_video_share_cnt = recommend_sheet[i][7]
|
|
|
+ download_video_duration = recommend_sheet[i][8]
|
|
|
+ download_video_resolution = recommend_sheet[i][9]
|
|
|
+ download_video_send_time = recommend_sheet[i][10]
|
|
|
+ download_user_name = recommend_sheet[i][11]
|
|
|
+ download_user_id = recommend_sheet[i][12]
|
|
|
+ download_head_url = recommend_sheet[i][13]
|
|
|
+ download_cover_url = recommend_sheet[i][14]
|
|
|
+ download_video_url = recommend_sheet[i][15]
|
|
|
+
|
|
|
+ # Common.logger(log_type).info("download_video_title:{}", download_video_title)
|
|
|
+ # Common.logger(log_type).info("download_video_id:{}", download_video_id)
|
|
|
+ # Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt)
|
|
|
+ # Common.logger(log_type).info("download_video_comment_cnt:{}", download_video_comment_cnt)
|
|
|
+ # Common.logger(log_type).info("download_video_share_cnt:{}", download_video_share_cnt)
|
|
|
+ # Common.logger(log_type).info("download_user_name:{}", download_user_name)
|
|
|
+ # Common.logger(log_type).info("download_user_id:{}", download_user_id)
|
|
|
+ # Common.logger(log_type).info("download_head_url:{}", download_head_url)
|
|
|
+ # Common.logger(log_type).info("download_cover_url:{}", download_cover_url)
|
|
|
+
|
|
|
+ Common.logger(log_type).info("正在判断第{}行:{}", i+1, download_video_title)
|
|
|
+ Common.logger(log_type).info("like_cnt:{}", download_video_like_cnt)
|
|
|
+ Common.logger(log_type).info("duration:{}", download_video_duration)
|
|
|
+ Common.logger(log_type).info("resolution:{}", download_video_resolution)
|
|
|
+ Common.logger(log_type).info("send_time:{}", download_video_send_time)
|
|
|
+ Common.logger(log_type).info("video_url:{}", download_video_url)
|
|
|
|
|
|
# 过滤空行
|
|
|
- if download_video_id is None \
|
|
|
- or download_video_id == "" \
|
|
|
- or download_video_title is None \
|
|
|
- or download_video_title == "":
|
|
|
- Common.logger("recommend").warning("空行,删除")
|
|
|
- # 删除行或列,可选 ROWS、COLUMNS
|
|
|
- Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
|
|
|
- return
|
|
|
- # 分享量>=1000
|
|
|
- elif int(download_video_share_cnt) < 1000:
|
|
|
- Common.logger("recommend").info("分享量:{} < 1000", download_video_share_cnt)
|
|
|
+ if download_video_id is None or download_video_title is None:
|
|
|
# 删除行或列,可选 ROWS、COLUMNS
|
|
|
- Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
|
|
|
+ Feishu.dimension_range(log_type, 'weishi', "O7fCzr", "ROWS", i + 1, i + 1)
|
|
|
+ Common.logger(log_type).warning("空行,已删除\n")
|
|
|
return
|
|
|
# 去重
|
|
|
- elif download_video_id in [j for m in Feishu.get_values_batch("recommend", "caa3fa") for j in m]:
|
|
|
- Common.logger("recommend").info("该视频已下载:{}", download_video_title)
|
|
|
+ elif download_video_id in [j for m in Feishu.get_values_batch(log_type, 'weishi', "caa3fa") for j in m]:
|
|
|
# 删除行或列,可选 ROWS、COLUMNS
|
|
|
- Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
|
|
|
+ Feishu.dimension_range(log_type, 'weishi', "O7fCzr", "ROWS", i + 1, i + 1)
|
|
|
+ Common.logger(log_type).info("视频已下载:{}\n", download_video_title)
|
|
|
return
|
|
|
else:
|
|
|
- Common.logger("recommend").info("开始下载视频:{}", download_video_title)
|
|
|
# 下载封面
|
|
|
- Common.download_method(job="recommend", text="cover",
|
|
|
+ Common.download_method(log_type, text="cover",
|
|
|
d_name=str(download_video_title), d_url=str(download_cover_url))
|
|
|
# 下载视频
|
|
|
- Common.download_method(job="recommend", text="video",
|
|
|
+ Common.download_method(log_type, text="video",
|
|
|
d_name=str(download_video_title), d_url=str(download_video_url))
|
|
|
# 保存视频信息至 "./videos/{download_video_title}/info.txt"
|
|
|
with open("./videos/" + download_video_title
|
|
@@ -378,22 +391,23 @@ class DownloadRecommend:
|
|
|
str(download_video_url) + "\n" +
|
|
|
str(download_cover_url) + "\n" +
|
|
|
str(cls.wesee_access_token))
|
|
|
- Common.logger("recommend").info("==========视频信息已保存至info.txt==========")
|
|
|
+ Common.logger(log_type).info("视频信息已保存至info.txt")
|
|
|
|
|
|
# 上传视频
|
|
|
- Common.logger("recommend").info("开始上传视频:{}".format(download_video_title))
|
|
|
- Publish.upload_and_publish("recommend", "prod", "play")
|
|
|
+ Common.logger(log_type).info("开始上传视频:{}".format(download_video_title))
|
|
|
+ our_video_id = Publish.upload_and_publish(log_type, env, "play")
|
|
|
+ our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
|
|
|
+ Common.logger(log_type).info("视频上传完成:{}", download_video_title)
|
|
|
|
|
|
- # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa
|
|
|
- Common.logger("recommend").info("保存视频ID至云文档:{}", download_video_title)
|
|
|
# 视频ID工作表,插入首行
|
|
|
- Feishu.insert_columns("recommend", "caa3fa", "ROWS", 1, 2)
|
|
|
+ Feishu.insert_columns(log_type, 'weishi', "caa3fa", "ROWS", 1, 2)
|
|
|
# 视频ID工作表,首行写入数据
|
|
|
upload_time = int(time.time())
|
|
|
values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time))),
|
|
|
"推荐榜",
|
|
|
- str(download_video_id),
|
|
|
str(download_video_title),
|
|
|
+ str(download_video_id),
|
|
|
+ our_video_link,
|
|
|
download_video_play_cnt,
|
|
|
download_video_comment_cnt,
|
|
|
download_video_like_cnt,
|
|
@@ -407,26 +421,40 @@ class DownloadRecommend:
|
|
|
str(download_cover_url),
|
|
|
str(download_video_url)]]
|
|
|
time.sleep(1)
|
|
|
- Feishu.update_values("recommend", "caa3fa", "A2:Q2", values)
|
|
|
+ Feishu.update_values(log_type, 'weishi', "caa3fa", "F2:W2", values)
|
|
|
+ Common.logger(log_type).info("视频已保存至云文档:{}", download_video_title)
|
|
|
|
|
|
# 删除行或列,可选 ROWS、COLUMNS
|
|
|
- Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
|
|
|
+ Feishu.dimension_range(log_type, 'weishi', "O7fCzr", "ROWS", i + 1, i + 1)
|
|
|
+ Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
|
|
|
return
|
|
|
except Exception as e:
|
|
|
- Common.logger("recommend").error("下载/上传视频异常:{}", e)
|
|
|
- Feishu.dimension_range("recommend", "O7fCzr", "ROWS", 2, 2)
|
|
|
+ Feishu.dimension_range(log_type, 'weishi', "O7fCzr", "ROWS", 2, 2)
|
|
|
+ Common.logger(log_type).error("download_publish异常,已删除该条数据:{}\n", e)
|
|
|
+
|
|
|
+ # 执行 下载/上传
|
|
|
+ @classmethod
|
|
|
+ def run_download_publish(cls, log_type, env):
|
|
|
+ try:
|
|
|
+ while True:
|
|
|
+ if len(Feishu.get_values_batch(log_type, 'weishi', 'O7fCzr')) == 1:
|
|
|
+ Common.logger(log_type).info("下载/上传完成\n")
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ cls.download_publish(log_type, env)
|
|
|
+ time.sleep(random.randint(1, 3))
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger(log_type).error("run_download_publish异常:{}", e)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- weishi = DownloadRecommend()
|
|
|
- for n in range(2):
|
|
|
- Common.logger("recommend").info("正在抓取第{}页视频", n + 1)
|
|
|
- weishi.get_feeds()
|
|
|
-
|
|
|
- # print(weishi.Referer)
|
|
|
- # print(weishi.wesee_openid)
|
|
|
- # print(weishi.wesee_openkey)
|
|
|
- # print(weishi.wesee_personid)
|
|
|
- # print(weishi.wesee_access_token)
|
|
|
- # print(weishi.wesee_thr_appid)
|
|
|
- # print(weishi.json_text)
|
|
|
+ # Recommend.get_feeds('weishi')
|
|
|
+ Recommend.download_publish('weishi', 'dev')
|
|
|
+
|
|
|
+ # print(Recommend.Referer)
|
|
|
+ # print(Recommend.wesee_openid)
|
|
|
+ # print(Recommend.wesee_openkey)
|
|
|
+ # print(Recommend.wesee_personid)
|
|
|
+ # print(Recommend.wesee_access_token)
|
|
|
+ # print(Recommend.wesee_thr_appid)
|
|
|
+ pass
|