3 年之前 · 3154c2923a
--- a/main/hour_list.py
+++ b/main/hour_list.py
@@ -126,6 +126,23 @@ class HourList:
 
				         else:
			
 
				             Common.logger(log_type).info("今日上升榜日期已存在")
			
 
				 
			
 
				+    # 获取表情及符号
			
 
				+    @classmethod
			
 
				+    def get_expression(cls):
			
 
				+        expression_list = []
			
 
				+        char_list = []
			
 
				+        char_sheet = Feishu.get_values_batch("hour", "xiaoniangao", "BhlbST")
			
 
				+        for i in range(len(char_sheet)):
			
 
				+            if char_sheet[i][0] is not None:
			
 
				+                expression_list.append(char_sheet[i][0])
			
 
				+            if char_sheet[i][1] is not None:
			
 
				+                char_list.append(char_sheet[i][1])
			
 
				+
			
 
				+        # print(f"expression_list:{expression_list}")
			
 
				+        # print(f"char_list:{char_list}")
			
 
				+
			
 
				+        return expression_list, char_list
			
 
				+
			
 
				     # 获取列表
			
 
				     @classmethod
			
 
				     def get_hour_list_feeds(cls, log_type):
			
@@ -198,194 +215,199 @@ class HourList:
 
				             "wx_ver": "8.0.20",
			
 
				             "code_ver": "3.62.0"
			
 
				         }
			
 
				-        try:
			
 
				-            urllib3.disable_warnings()
			
 
				-            r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
			
 
				-            if "data" not in r.json():
			
 
				-                Common.logger(log_type).warning("获取视频feeds错误:{}", r.text)
			
 
				-            elif "list" not in r.json()["data"]:
			
 
				-                Common.logger(log_type).warning("获取视频feeds无数据，休眠10s:{}", r.json()["data"])
			
 
				-            else:
			
 
				-                # 视频列表数据
			
 
				-                feeds = r.json()["data"]["list"]
			
 
				-                for i in range(len(feeds)):
			
 
				-                    # 标题，表情随机加在片头、片尾，或替代句子中间的标点符号
			
 
				-                    if "title" in feeds[i]:
			
 
				-                        char_sheet = Feishu.get_values_batch("hour", "xiaoniangao", "BhlbST")
			
 
				-                        expression_list = []
			
 
				-                        char_list = []
			
 
				-                        for q in range(len(char_sheet)):
			
 
				-                            if char_sheet[q][0] is not None:
			
 
				-                                expression_list.append(char_sheet[i][0])
			
 
				-                            if char_sheet[q][1] is not None:
			
 
				-                                char_list.append(char_sheet[i][1])
			
 
				-                        befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
			
 
				-                            .replace("/", "").replace("\r", "").replace("#", "") \
			
 
				-                            .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
			
 
				-                            .replace(":", "").replace("*", "").replace("？", "") \
			
 
				-                            .replace("?", "").replace('"', "").replace("<", "") \
			
 
				-                            .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号", "")
			
 
				-                        # 随机取一个表情
			
 
				-                        expression = random.choice(expression_list)
			
 
				-                        # 生成标题list[表情+title, title+表情]
			
 
				-                        expression_title_list = [expression + befor_video_title, befor_video_title + expression]
			
 
				-                        # 从标题list中随机取一个标题
			
 
				-                        title_list1 = random.choice(expression_title_list)
			
 
				-                        # 生成标题:原标题+符号
			
 
				-                        title_list2 = befor_video_title + random.choice(char_list)
			
 
				-                        # title_list3 = befor_video_title.replace(
			
 
				-                        #     ",", random.choice(expression_list)).replace("，", random.choice(expression_list))
			
 
				-                        # 表情和标题组合，与标题和符号组合，汇总成待使用的标题列表
			
 
				-                        title_list4 = [title_list1, title_list2]
			
 
				-                        # 最终标题
			
 
				-                        video_title = random.choice(title_list4)
			
 
				-                    else:
			
 
				-                        video_title = 0
			
 
				-
			
 
				-                    # 视频 ID
			
 
				-                    if "vid" in feeds[i]:
			
 
				-                        video_id = feeds[i]["vid"]
			
 
				-                    else:
			
 
				-                        video_id = 0
			
 
				+        # try:
			
 
				+        urllib3.disable_warnings()
			
 
				+        r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
			
 
				+        if "data" not in r.json():
			
 
				+            Common.logger(log_type).warning("获取视频feeds错误:{}", r.text)
			
 
				+        elif "list" not in r.json()["data"]:
			
 
				+            Common.logger(log_type).warning("获取视频feeds无数据，休眠10s:{}", r.json()["data"])
			
 
				+        else:
			
 
				+            # 视频列表数据
			
 
				+            feeds = r.json()["data"]["list"]
			
 
				+            for i in range(len(feeds)):
			
 
				+                # 标题，表情随机加在片头、片尾，或替代句子中间的标点符号
			
 
				+                if "title" in feeds[i]:
			
 
				+                    befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
			
 
				+                        .replace("/", "").replace("\r", "").replace("#", "") \
			
 
				+                        .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
			
 
				+                        .replace(":", "").replace("*", "").replace("？", "") \
			
 
				+                        .replace("?", "").replace('"', "").replace("<", "") \
			
 
				+                        .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号", "")
			
 
				+
			
 
				+                    expression = cls.get_expression()
			
 
				+                    expression_list = expression[0]
			
 
				+                    char_list = expression[1]
			
 
				+                    # print(f"expression_list:{expression_list}")
			
 
				+                    # print(f"char_list:{char_list}")
			
 
				+                    # 随机取一个表情
			
 
				+                    expression = random.choice(expression_list)
			
 
				+                    # print(f"expression:{expression}")
			
 
				+                    # 生成标题list[表情+title, title+表情]
			
 
				+                    expression_title_list = [expression + befor_video_title, befor_video_title + expression]
			
 
				+                    # print(f"expression_title_list:{expression_title_list}")
			
 
				+                    # 从标题list中随机取一个标题
			
 
				+                    title_list1 = random.choice(expression_title_list)
			
 
				+                    # print(f"title_list1:{title_list1}")
			
 
				+                    # 生成标题:原标题+符号
			
 
				+                    title_list2 = befor_video_title + random.choice(char_list)
			
 
				+                    # print(f"befor_video_title:{befor_video_title}")
			
 
				+
			
 
				+                    # title_list3 = befor_video_title.replace(
			
 
				+                    #     ",", random.choice(expression_list)).replace("，", random.choice(expression_list))
			
 
				+                    # 表情和标题组合，与标题和符号组合，汇总成待使用的标题列表
			
 
				+                    title_list4 = [title_list2, title_list1]
			
 
				+                    # print(f"title_list4:{title_list4}")
			
 
				+                    # 最终标题
			
 
				+                    video_title = random.choice(title_list4)
			
 
				+                    # print(f"video_title:{video_title}\n")
			
 
				+                else:
			
 
				+                    video_title = 0
			
 
				 
			
 
				-                    # 播放量
			
 
				-                    if "play_pv" in feeds[i]:
			
 
				-                        video_play_cnt = feeds[i]["play_pv"]
			
 
				-                    else:
			
 
				-                        video_play_cnt = 0
			
 
				+                # 视频 ID
			
 
				+                if "vid" in feeds[i]:
			
 
				+                    video_id = feeds[i]["vid"]
			
 
				+                else:
			
 
				+                    video_id = 0
			
 
				 
			
 
				-                    # 点赞量
			
 
				-                    if "favor" in feeds[i]:
			
 
				-                        video_like_cnt = feeds[i]["favor"]["total"]
			
 
				-                    else:
			
 
				-                        video_like_cnt = 0
			
 
				+                # 播放量
			
 
				+                if "play_pv" in feeds[i]:
			
 
				+                    video_play_cnt = feeds[i]["play_pv"]
			
 
				+                else:
			
 
				+                    video_play_cnt = 0
			
 
				 
			
 
				-                    # 分享量
			
 
				-                    if "share" in feeds[i]:
			
 
				-                        video_share_cnt = feeds[i]["share"]
			
 
				-                    else:
			
 
				-                        video_share_cnt = 0
			
 
				+                # 点赞量
			
 
				+                if "favor" in feeds[i]:
			
 
				+                    video_like_cnt = feeds[i]["favor"]["total"]
			
 
				+                else:
			
 
				+                    video_like_cnt = 0
			
 
				 
			
 
				-                    # # 评论量
			
 
				-                    # if "comment_count" in feeds[i]:
			
 
				-                    #     video_comment_cnt = feeds[i]["comment_count"]
			
 
				-                    # else:
			
 
				-                    #     video_comment_cnt = 0
			
 
				+                # 分享量
			
 
				+                if "share" in feeds[i]:
			
 
				+                    video_share_cnt = feeds[i]["share"]
			
 
				+                else:
			
 
				+                    video_share_cnt = 0
			
 
				 
			
 
				-                    # 时长
			
 
				-                    if "du" in feeds[i]:
			
 
				-                        video_duration = int(feeds[i]["du"] / 1000)
			
 
				-                    else:
			
 
				-                        video_duration = 0
			
 
				+                # # 评论量
			
 
				+                # if "comment_count" in feeds[i]:
			
 
				+                #     video_comment_cnt = feeds[i]["comment_count"]
			
 
				+                # else:
			
 
				+                #     video_comment_cnt = 0
			
 
				 
			
 
				-                    # 宽和高
			
 
				-                    if "w" or "h" in feeds[i]:
			
 
				-                        video_width = feeds[i]["w"]
			
 
				-                        video_height = feeds[i]["h"]
			
 
				-                    else:
			
 
				-                        video_width = 0
			
 
				-                        video_height = 0
			
 
				+                # 时长
			
 
				+                if "du" in feeds[i]:
			
 
				+                    video_duration = int(feeds[i]["du"] / 1000)
			
 
				+                else:
			
 
				+                    video_duration = 0
			
 
				 
			
 
				-                    # 发布时间
			
 
				-                    if "t" in feeds[i]:
			
 
				-                        video_send_time = feeds[i]["t"]
			
 
				-                    else:
			
 
				-                        video_send_time = 0
			
 
				-
			
 
				-                    # 用户名 / 头像
			
 
				-                    if "user" in feeds[i]:
			
 
				-                        user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
			
 
				-                            .replace("/", "").replace("快手", "").replace(" ", "") \
			
 
				-                            .replace(" ", "").replace("&NBSP", "").replace("\r", "")
			
 
				-                        head_url = feeds[i]["user"]["hurl"]
			
 
				-                    else:
			
 
				-                        user_name = 0
			
 
				-                        head_url = 0
			
 
				+                # 宽和高
			
 
				+                if "w" or "h" in feeds[i]:
			
 
				+                    video_width = feeds[i]["w"]
			
 
				+                    video_height = feeds[i]["h"]
			
 
				+                else:
			
 
				+                    video_width = 0
			
 
				+                    video_height = 0
			
 
				 
			
 
				-                    # 用户 ID
			
 
				-                    profile_id = feeds[i]["id"]
			
 
				+                # 发布时间
			
 
				+                if "t" in feeds[i]:
			
 
				+                    video_send_time = feeds[i]["t"]
			
 
				+                else:
			
 
				+                    video_send_time = 0
			
 
				+
			
 
				+                # 用户名 / 头像
			
 
				+                if "user" in feeds[i]:
			
 
				+                    user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
			
 
				+                        .replace("/", "").replace("快手", "").replace(" ", "") \
			
 
				+                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
			
 
				+                    head_url = feeds[i]["user"]["hurl"]
			
 
				+                else:
			
 
				+                    user_name = 0
			
 
				+                    head_url = 0
			
 
				 
			
 
				-                    # 用户 mid
			
 
				-                    profile_mid = feeds[i]["user"]["mid"]
			
 
				+                # 用户 ID
			
 
				+                profile_id = feeds[i]["id"]
			
 
				 
			
 
				-                    # 视频封面
			
 
				-                    if "url" in feeds[i]:
			
 
				-                        cover_url = feeds[i]["url"]
			
 
				-                    else:
			
 
				-                        cover_url = 0
			
 
				+                # 用户 mid
			
 
				+                profile_mid = feeds[i]["user"]["mid"]
			
 
				 
			
 
				-                    # 视频播放地址
			
 
				-                    if "v_url" in feeds[i]:
			
 
				-                        video_url = feeds[i]["v_url"]
			
 
				-                    else:
			
 
				-                        video_url = 0
			
 
				-
			
 
				-                    Common.logger(log_type).info("标题:{}", video_title)
			
 
				-                    Common.logger(log_type).info("视频ID:{}", video_id)
			
 
				-                    Common.logger(log_type).info("播放量:{}", video_play_cnt)
			
 
				-                    # Common.logger(log_type).info("点赞量:{}", video_like_cnt)
			
 
				-                    # Common.logger(log_type).info("分享量:{}", video_share_cnt)
			
 
				-                    # Common.logger(log_type).info("评论数:{}", video_comment_cnt)
			
 
				-                    Common.logger(log_type).info("时长:{}秒", video_duration)
			
 
				-                    # Common.logger(log_type).info("宽高:{}*{}", video_width, video_height)
			
 
				-                    Common.logger(log_type).info(
			
 
				-                        "视频发布时间:{}", time.strftime(
			
 
				-                            "%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
			
 
				-                    Common.logger(log_type).info("用户名:{}", user_name)
			
 
				-                    # Common.logger(log_type).info("用户头像:{}", head_url)
			
 
				-                    # Common.logger(log_type).info("封面:{}", cover_url)
			
 
				-                    Common.logger(log_type).info("播放地址:{}", video_url)
			
 
				-
			
 
				-                    # 过滤无效视频
			
 
				-                    if video_title == 0 or video_id == 0 or video_duration == 0 \
			
 
				-                            or video_send_time == 0 or user_name == 0 or head_url == 0 \
			
 
				-                            or cover_url == 0 or video_url == 0:
			
 
				-                        Common.logger(log_type).warning("无效视频")
			
 
				-
			
 
				-                    elif cls.download_rule(video_duration, video_width, video_height, video_play_cnt,
			
 
				-                                           video_like_cnt, video_share_cnt, video_send_time) is False:
			
 
				-                        Common.logger(log_type).info("不满足基础门槛规则")
			
 
				-
			
 
				-                    # 过滤敏感词
			
 
				-                    elif any(word if word in video_title else False for word in cls.sensitive_words(log_type)) is True:
			
 
				-                        Common.logger(log_type).info("视频已中敏感词:{}".format(video_title))
			
 
				-                        time.sleep(1)
			
 
				-
			
 
				-                    # 从云文档中去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2
			
 
				-                    elif video_id in [j for i in Feishu.get_values_batch(log_type, "xiaoniangao", "yatRv2") for j in i]:
			
 
				-                        Common.logger(log_type).info("该视频已下载:{}", video_title)
			
 
				-                        time.sleep(1)
			
 
				-
			
 
				-                    # 从云文档去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=ba0da4
			
 
				-                    elif video_id in [j for i in Feishu.get_values_batch("hour", "xiaoniangao", "ba0da4") for j in i]:
			
 
				-                        Common.logger(log_type).info("该视频已保存过:{}", video_title)
			
 
				-                        time.sleep(1)
			
 
				-                    else:
			
 
				-                        Common.logger(log_type).info("该视频未下载，添加至feeds中:{}".format(video_title))
			
 
				-                        # feeds工作表，插入空行
			
 
				-                        time.sleep(1)
			
 
				-                        Feishu.insert_columns(log_type, "xiaoniangao", "ba0da4", "ROWS", 2, 3)
			
 
				-
			
 
				-                        # 获取当前时间
			
 
				-                        get_feeds_time = int(time.time())
			
 
				-                        # 看一看云文档，工作表中写入数据
			
 
				-                        values = [[profile_id,
			
 
				-                                   profile_mid,
			
 
				-                                   video_id,
			
 
				-                                   video_title,
			
 
				-                                   user_name,
			
 
				-                                   video_duration,
			
 
				-                                   cover_url,
			
 
				-                                   video_url,
			
 
				-                                   time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
			
 
				-                                   str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(get_feeds_time))),
			
 
				-                                   video_play_cnt]]
			
 
				-                        # 等待 1s，防止操作云文档太频繁，导致报错
			
 
				-                        time.sleep(1)
			
 
				-                        Feishu.update_values(log_type, "xiaoniangao", "ba0da4", "A3:K3", values)
			
 
				+                # 视频封面
			
 
				+                if "url" in feeds[i]:
			
 
				+                    cover_url = feeds[i]["url"]
			
 
				+                else:
			
 
				+                    cover_url = 0
			
 
				 
			
 
				-        except Exception as e:
			
 
				-            Common.logger(log_type).error("获取小时榜视频列表异常:{}", e)
			
 
				+                # 视频播放地址
			
 
				+                if "v_url" in feeds[i]:
			
 
				+                    video_url = feeds[i]["v_url"]
			
 
				+                else:
			
 
				+                    video_url = 0
			
 
				+
			
 
				+                Common.logger(log_type).info("标题:{}", video_title)
			
 
				+                Common.logger(log_type).info("视频ID:{}", video_id)
			
 
				+                Common.logger(log_type).info("播放量:{}", video_play_cnt)
			
 
				+                # Common.logger(log_type).info("点赞量:{}", video_like_cnt)
			
 
				+                # Common.logger(log_type).info("分享量:{}", video_share_cnt)
			
 
				+                # Common.logger(log_type).info("评论数:{}", video_comment_cnt)
			
 
				+                Common.logger(log_type).info("时长:{}秒", video_duration)
			
 
				+                # Common.logger(log_type).info("宽高:{}*{}", video_width, video_height)
			
 
				+                Common.logger(log_type).info(
			
 
				+                    "视频发布时间:{}", time.strftime(
			
 
				+                        "%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
			
 
				+                Common.logger(log_type).info("用户名:{}", user_name)
			
 
				+                # Common.logger(log_type).info("用户头像:{}", head_url)
			
 
				+                # Common.logger(log_type).info("封面:{}", cover_url)
			
 
				+                Common.logger(log_type).info("播放地址:{}", video_url)
			
 
				+
			
 
				+                # 过滤无效视频
			
 
				+                if video_title == 0 or video_id == 0 or video_duration == 0 \
			
 
				+                        or video_send_time == 0 or user_name == 0 or head_url == 0 \
			
 
				+                        or cover_url == 0 or video_url == 0:
			
 
				+                    Common.logger(log_type).warning("无效视频")
			
 
				+
			
 
				+                elif cls.download_rule(video_duration, video_width, video_height, video_play_cnt,
			
 
				+                                       video_like_cnt, video_share_cnt, video_send_time) is False:
			
 
				+                    Common.logger(log_type).info("不满足基础门槛规则")
			
 
				+
			
 
				+                # 过滤敏感词
			
 
				+                elif any(word if word in video_title else False for word in cls.sensitive_words(log_type)) is True:
			
 
				+                    Common.logger(log_type).info("视频已中敏感词:{}".format(video_title))
			
 
				+                    time.sleep(1)
			
 
				+
			
 
				+                # 从云文档中去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2
			
 
				+                elif video_id in [j for i in Feishu.get_values_batch(log_type, "xiaoniangao", "yatRv2") for j in i]:
			
 
				+                    Common.logger(log_type).info("该视频已下载:{}", video_title)
			
 
				+                    time.sleep(1)
			
 
				+
			
 
				+                # 从云文档去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=ba0da4
			
 
				+                elif video_id in [j for i in Feishu.get_values_batch("hour", "xiaoniangao", "ba0da4") for j in i]:
			
 
				+                    Common.logger(log_type).info("该视频已保存过:{}", video_title)
			
 
				+                    time.sleep(1)
			
 
				+                else:
			
 
				+                    Common.logger(log_type).info("该视频未下载，添加至feeds中:{}".format(video_title))
			
 
				+                    # feeds工作表，插入空行
			
 
				+                    time.sleep(1)
			
 
				+                    Feishu.insert_columns(log_type, "xiaoniangao", "ba0da4", "ROWS", 2, 3)
			
 
				+
			
 
				+                    # 获取当前时间
			
 
				+                    get_feeds_time = int(time.time())
			
 
				+                    # 看一看云文档，工作表中写入数据
			
 
				+                    values = [[profile_id,
			
 
				+                               profile_mid,
			
 
				+                               video_id,
			
 
				+                               video_title,
			
 
				+                               user_name,
			
 
				+                               video_duration,
			
 
				+                               cover_url,
			
 
				+                               video_url,
			
 
				+                               time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
			
 
				+                               str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(get_feeds_time))),
			
 
				+                               video_play_cnt]]
			
 
				+                    # 等待 1s，防止操作云文档太频繁，导致报错
			
 
				+                    time.sleep(1)
			
 
				+                    Feishu.update_values(log_type, "xiaoniangao", "ba0da4", "A3:K3", values)
			
 
				+
			
 
				+        # except Exception as e:
			
 
				+        #     Common.logger(log_type).error("获取小时榜视频列表异常:{}", e)
			
 
				 
			
 
				     # 更新小时榜数据
			
 
				     @classmethod
			
@@ -1558,12 +1580,5 @@ class HourList:
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    hour_list = HourList()
			
 
				-    # hour_list.get_hour_list_feeds()
			
 
				-    # hour_list.del_null_rows("xiaoniangao", "ba0da4", 3)
			
 
				-    # hour_list.update_hour_list_data(
			
 
				-    #     today=datetime.datetime.now().strftime("%Y-%m-%d"),
			
 
				-    #     yesterday=(datetime.date.today() + datetime.timedelta(days=-1)).strftime("%Y-%m-%d"),
			
 
				-    #     before_yesterday=(datetime.date.today() + datetime.timedelta(days=-2)).strftime("%Y-%m-%d"))
			
 
				-    # hour_list.download_and_publish("hour")
			
 
				-    hour_list.update_hour_list_data("hour", "2022-07-01", "2022-06-30", "2022-06-29")
			
 
				+    HourList.get_hour_list_feeds("hour")
			
 
				+    # HourList.get_expression("hour")