wangkun 2 vuotta sitten
vanhempi
commit
edfe8d2fb2
1 muutettua tiedostoa jossa 209 lisäystä ja 208 poistoa
  1. 209 208
      main/weishi_recommend.py

+ 209 - 208
main/weishi_recommend.py

@@ -90,26 +90,27 @@ class Recommend:
         3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr 中去重
         4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr
         """
-        url = "https://api.weishi.qq.com/trpc.weishi.weishi_h5_proxy.weishi_h5_proxy/WxminiGetFeedList"
-        headers = {
-            "content-type": "application/json",
-            "Accept-Encoding": "gzip,compress,br,deflate",
-            "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
-                          " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"
-                          " MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN",
-            "Referer": str(cls.Referer)
-        }
-        cookies = {
-            "wesee_authtype": "3",
-            "wesee_openid": str(cls.wesee_openid),
-            "wesee_openkey": str(cls.wesee_openkey),
-            "wesee_personid": str(cls.wesee_personid),
-            "wesee_refresh_token": "",
-            "wesee_access_token": str(cls.wesee_access_token),
-            "wesee_thr_appid": str(cls.wesee_thr_appid),
-            "wesee_ichid": "8"
-        }
-        json_data = {
+        try:
+            url = "https://api.weishi.qq.com/trpc.weishi.weishi_h5_proxy.weishi_h5_proxy/WxminiGetFeedList"
+            headers = {
+                "content-type": "application/json",
+                "Accept-Encoding": "gzip,compress,br,deflate",
+                "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
+                              " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"
+                              " MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN",
+                "Referer": str(cls.Referer)
+            }
+            cookies = {
+                "wesee_authtype": "3",
+                "wesee_openid": str(cls.wesee_openid),
+                "wesee_openkey": str(cls.wesee_openkey),
+                "wesee_personid": str(cls.wesee_personid),
+                "wesee_refresh_token": "",
+                "wesee_access_token": str(cls.wesee_access_token),
+                "wesee_thr_appid": str(cls.wesee_thr_appid),
+                "wesee_ichid": "8"
+            }
+            json_data = {
             "req_body": {
                 "requestType": 16,
                 "isrefresh": 1,
@@ -125,194 +126,194 @@ class Recommend:
                 "mapExt": "{\"imageSize\":\"480\",\"adaptScene\":\"PicHDWebpLimitScene\"}"
             }
         }
-        try:
-            while True:
-                urllib3.disable_warnings()
-                r = requests.post(headers=headers, url=url, cookies=cookies, json=json_data, proxies=proxies,
-                                  verify=False)
-                response = json.loads(r.content.decode("utf8"))
-                feeds = response["rsp_body"]["feeds"]
-                for i in range(len(feeds)):
-                    # 视频标题过滤话题及处理特殊字符
-                    weishi_title = feeds[i]["desc"]
-                    title_split1 = weishi_title.split(" #")
-                    if title_split1[0] != "":
-                        title1 = title_split1[0]
-                    else:
-                        title1 = title_split1[-1]
-
-                    title_split2 = title1.split(" #")
-                    if title_split2[0] != "":
-                        title2 = title_split2[0]
-                    else:
-                        title2 = title_split2[-1]
-
-                    title_split3 = title2.split("@")
-                    if title_split3[0] != "":
-                        title3 = title_split3[0]
-                    else:
-                        title3 = title_split3[-1]
-                    # 视频标题
-                    video_title = title3.strip().replace("\n", "").replace("/", "")\
-                        .replace("快手", "").replace(" ", "").replace(" ", "").replace("&NBSP", "")\
-                        .replace("\r", "").replace("#", "").replace(".", "。").replace("\\", "").replace(":", "")\
-                        .replace("*", "").replace("?", "").replace("?", "").replace('"', "").replace("<", "")\
-                        .replace(">", "").replace("|", "").replace("微视", "")[:40]
-
-                    # 视频 ID
-                    if "id" not in feeds[i]["video"]:
-                        video_id = 0
-                    else:
-                        video_id = feeds[i]["video"]["id"]
-
-                    # 播放数
-                    if "playNum" not in feeds[i]["ugcData"]:
-                        video_play_cnt = 0
-                    else:
-                        video_play_cnt = feeds[i]["ugcData"]["playNum"]
-
-                    # 点赞数
-                    if "dingCount" not in feeds[i]["ugcData"]:
-                        video_like_cnt = 0
-                    else:
-                        video_like_cnt = feeds[i]["ugcData"]["dingCount"]
-
-                    # 分享数
-                    if "shareNum" not in feeds[i]["ugcData"]:
-                        video_share_cnt = 0
-                    else:
-                        video_share_cnt = feeds[i]["ugcData"]["shareNum"]
-
-                    # 评论数
-                    if "totalCommentNum" not in feeds[i]["ugcData"]:
-                        video_comment_cnt = 0
-                    else:
-                        video_comment_cnt = feeds[i]["ugcData"]["totalCommentNum"]
-
-                    # 视频时长
-                    if "duration" not in feeds[i]["video"]:
-                        video_duration = 0
-                    else:
-                        video_duration = int(int(feeds[i]["video"]["duration"]) / 1000)
-
-                    # 视频宽高
-                    if "width" not in feeds[i]["video"] or "height" not in feeds[i]["video"]:
-                        video_width = 0
-                        video_height = 0
-                        video_resolution = str(video_width) + "*" + str(video_height)
-                    else:
-                        video_width = feeds[i]["video"]["width"]
-                        video_height = feeds[i]["video"]["height"]
-                        video_resolution = str(video_width) + "*" + str(video_height)
-
-                    # 视频发布时间
-                    if "createTime" not in feeds[i]:
-                        video_send_time = 0
-                    else:
-                        video_send_time = int(feeds[i]["createTime"]) * 1000
-
-                    # 用户昵称
-                    user_name = feeds[i]["poster"]["nick"].strip().replace("\n", "") \
-                        .replace("/", "").replace("快手", "").replace(" ", "") \
-                        .replace(" ", "").replace("&NBSP", "").replace("\r", "").replace("微视", "")
-
-                    # 用户 ID
-                    user_id = feeds[i]["poster"]["id"]
-
-                    # 用户头像地址
-                    if "thumbURL" not in feeds[i]["material"] and "avatar" not in feeds[i]["poster"]:
-                        head_url = 0
-                    elif "thumbURL" in feeds[i]["material"]:
-                        head_url = feeds[i]["material"]["thumbURL"]
-                    else:
-                        head_url = feeds[i]["poster"]["avatar"]
-
-                    # 视频封面地址
-                    if len(feeds[i]["images"]) == 0:
-                        cover_url = 0
-                    else:
-                        cover_url = feeds[i]["images"][0]["url"]
-
-                    # 视频播放地址
-                    if "url" not in feeds[i]["video"]:
-                        video_url = 0
-                    else:
-                        video_url = feeds[i]["video"]["url"]
-
-                    Common.logger(log_type).info("video_title:{}".format(video_title))
-                    Common.logger(log_type).info("video_id:{}".format(video_id))
-                    Common.logger(log_type).info("video_like_cnt:{}".format(video_like_cnt))
-                    Common.logger(log_type).info("video_share_cnt:{}".format(video_share_cnt))
-                    Common.logger(log_type).info("video_comment_cnt:{}".format(video_comment_cnt))
-                    Common.logger(log_type).info("video_duration:{}秒".format(video_duration))
-                    Common.logger(log_type).info(
-                        "video_send_time:{}".format(time.strftime(
-                            "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
-                    Common.logger(log_type).info("user_name:{}".format(user_name))
-                    Common.logger(log_type).info("video_url:{}".format(video_url))
-                    # Common.logger(log_type).info("video_play_cnt:{}".format(video_play_cnt))
-                    # Common.logger(log_type).info("video_resolution:{}".format(video_resolution))
-                    # Common.logger(log_type).info("user_id:{}".format(user_id))
-                    # Common.logger(log_type).info("head_url:{}".format(head_url))
-                    # Common.logger(log_type).info("cover_url:{}".format(cover_url))
-
-                    # 过滤无效视频
-                    if video_id == 0 or video_duration == 0 or video_send_time == 0 or head_url == 0 \
-                            or cover_url == 0 or video_url == 0:
-                        Common.logger(log_type).info("无效视频\n")
-                    # 判断基础规则
-                    elif cls.download_rule(video_duration, video_width, video_height, video_like_cnt) is False:
-                        Common.logger(log_type).info("不满足基础规则\n")
-                    # 标题敏感词过滤
-                    elif any(word if word in weishi_title else False for word in
-                             cls.video_title_sensitive_words(log_type)) is True:
-                        Common.logger(log_type).info("标题已中敏感词:{}\n".format(weishi_title))
-                    # 用户名敏感词过滤
-                    elif any(word if word in user_name else False for word in
-                             cls.username_sensitive_words(log_type)) is True:
-                        Common.logger(log_type).info("用户名已中敏感词:{}\n".format(user_name))
-                    # 从已下载云文档去重
-                    elif str(video_id) in [j for m in Feishu.get_values_batch(log_type, 'weishi', "caa3fa") for j in m]:
-                        Common.logger(log_type).info("视频已下载:{}\n", video_title)
-                    # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr
-                    elif str(video_id) in [j for n in Feishu.get_values_batch(log_type, 'weishi', "O7fCzr") for j in n]:
-                        Common.logger(log_type).info("视频已存在:{}\n", video_title)
-                    else:
-                        # 添加到已下载视频列表
-                        cls.video_count.append(video_id)
-
-                        # feeds工作表,插入首行
-                        Feishu.insert_columns(log_type, 'weishi', "O7fCzr", "ROWS", 1, 2)
-                        # 获取当前时间
-                        get_feeds_time = int(time.time())
-                        # 工作表 feeds 中写入数据
-                        values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(get_feeds_time))),
-                                   "推荐榜",
-                                   str(video_id),
-                                   video_title,
-                                   int(video_play_cnt),
-                                   int(video_comment_cnt),
-                                   int(video_like_cnt),
-                                   int(video_share_cnt),
-                                   video_duration,
-                                   video_resolution,
-                                   time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time / 1000))),
-                                   user_name,
-                                   user_id,
-                                   head_url,
-                                   cover_url,
-                                   video_url]]
-                        # 等待 1s,防止操作云文档太频繁,导致报错
-                        time.sleep(1)
-                        Feishu.update_values(log_type, 'weishi', "O7fCzr", "A2:T2", values)
-                        Common.logger(log_type).info("视频保存至云文档成功\n")
-                        time.sleep(random.randint(3, 5))
-
-                        # 每天抓取 50 条
-                        if len(cls.video_count) >= cls.crawler_count:
-                            Common.logger(log_type).info("已抓取{}条数据\n", len(cls.video_count))
-                            cls.video_count = []
-                            return
+
+            # while True:
+            urllib3.disable_warnings()
+            r = requests.post(headers=headers, url=url, cookies=cookies, json=json_data, proxies=proxies,
+                              verify=False)
+            response = json.loads(r.content.decode("utf8"))
+            feeds = response["rsp_body"]["feeds"]
+            for i in range(len(feeds)):
+                # 视频标题过滤话题及处理特殊字符
+                weishi_title = feeds[i]["desc"]
+                title_split1 = weishi_title.split(" #")
+                if title_split1[0] != "":
+                    title1 = title_split1[0]
+                else:
+                    title1 = title_split1[-1]
+
+                title_split2 = title1.split(" #")
+                if title_split2[0] != "":
+                    title2 = title_split2[0]
+                else:
+                    title2 = title_split2[-1]
+
+                title_split3 = title2.split("@")
+                if title_split3[0] != "":
+                    title3 = title_split3[0]
+                else:
+                    title3 = title_split3[-1]
+                # 视频标题
+                video_title = title3.strip().replace("\n", "").replace("/", "")\
+                    .replace("快手", "").replace(" ", "").replace(" ", "").replace("&NBSP", "")\
+                    .replace("\r", "").replace("#", "").replace(".", "。").replace("\\", "").replace(":", "")\
+                    .replace("*", "").replace("?", "").replace("?", "").replace('"', "").replace("<", "")\
+                    .replace(">", "").replace("|", "").replace("微视", "")[:40]
+
+                # 视频 ID
+                if "id" not in feeds[i]["video"]:
+                    video_id = 0
+                else:
+                    video_id = feeds[i]["video"]["id"]
+
+                # 播放数
+                if "playNum" not in feeds[i]["ugcData"]:
+                    video_play_cnt = 0
+                else:
+                    video_play_cnt = feeds[i]["ugcData"]["playNum"]
+
+                # 点赞数
+                if "dingCount" not in feeds[i]["ugcData"]:
+                    video_like_cnt = 0
+                else:
+                    video_like_cnt = feeds[i]["ugcData"]["dingCount"]
+
+                # 分享数
+                if "shareNum" not in feeds[i]["ugcData"]:
+                    video_share_cnt = 0
+                else:
+                    video_share_cnt = feeds[i]["ugcData"]["shareNum"]
+
+                # 评论数
+                if "totalCommentNum" not in feeds[i]["ugcData"]:
+                    video_comment_cnt = 0
+                else:
+                    video_comment_cnt = feeds[i]["ugcData"]["totalCommentNum"]
+
+                # 视频时长
+                if "duration" not in feeds[i]["video"]:
+                    video_duration = 0
+                else:
+                    video_duration = int(int(feeds[i]["video"]["duration"]) / 1000)
+
+                # 视频宽高
+                if "width" not in feeds[i]["video"] or "height" not in feeds[i]["video"]:
+                    video_width = 0
+                    video_height = 0
+                    video_resolution = str(video_width) + "*" + str(video_height)
+                else:
+                    video_width = feeds[i]["video"]["width"]
+                    video_height = feeds[i]["video"]["height"]
+                    video_resolution = str(video_width) + "*" + str(video_height)
+
+                # 视频发布时间
+                if "createTime" not in feeds[i]:
+                    video_send_time = 0
+                else:
+                    video_send_time = int(feeds[i]["createTime"]) * 1000
+
+                # 用户昵称
+                user_name = feeds[i]["poster"]["nick"].strip().replace("\n", "") \
+                    .replace("/", "").replace("快手", "").replace(" ", "") \
+                    .replace(" ", "").replace("&NBSP", "").replace("\r", "").replace("微视", "")
+
+                # 用户 ID
+                user_id = feeds[i]["poster"]["id"]
+
+                # 用户头像地址
+                if "thumbURL" not in feeds[i]["material"] and "avatar" not in feeds[i]["poster"]:
+                    head_url = 0
+                elif "thumbURL" in feeds[i]["material"]:
+                    head_url = feeds[i]["material"]["thumbURL"]
+                else:
+                    head_url = feeds[i]["poster"]["avatar"]
+
+                # 视频封面地址
+                if len(feeds[i]["images"]) == 0:
+                    cover_url = 0
+                else:
+                    cover_url = feeds[i]["images"][0]["url"]
+
+                # 视频播放地址
+                if "url" not in feeds[i]["video"]:
+                    video_url = 0
+                else:
+                    video_url = feeds[i]["video"]["url"]
+
+                Common.logger(log_type).info("video_title:{}".format(video_title))
+                Common.logger(log_type).info("video_id:{}".format(video_id))
+                Common.logger(log_type).info("video_like_cnt:{}".format(video_like_cnt))
+                Common.logger(log_type).info("video_share_cnt:{}".format(video_share_cnt))
+                Common.logger(log_type).info("video_comment_cnt:{}".format(video_comment_cnt))
+                Common.logger(log_type).info("video_duration:{}秒".format(video_duration))
+                Common.logger(log_type).info(
+                    "video_send_time:{}".format(time.strftime(
+                        "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
+                Common.logger(log_type).info("user_name:{}".format(user_name))
+                Common.logger(log_type).info("video_url:{}".format(video_url))
+                # Common.logger(log_type).info("video_play_cnt:{}".format(video_play_cnt))
+                # Common.logger(log_type).info("video_resolution:{}".format(video_resolution))
+                # Common.logger(log_type).info("user_id:{}".format(user_id))
+                # Common.logger(log_type).info("head_url:{}".format(head_url))
+                # Common.logger(log_type).info("cover_url:{}".format(cover_url))
+
+                # 过滤无效视频
+                if video_id == 0 or video_duration == 0 or video_send_time == 0 or head_url == 0 \
+                        or cover_url == 0 or video_url == 0:
+                    Common.logger(log_type).info("无效视频\n")
+                # 判断基础规则
+                elif cls.download_rule(video_duration, video_width, video_height, video_like_cnt) is False:
+                    Common.logger(log_type).info("不满足基础规则\n")
+                # 标题敏感词过滤
+                elif any(word if word in weishi_title else False for word in
+                         cls.video_title_sensitive_words(log_type)) is True:
+                    Common.logger(log_type).info("标题已中敏感词:{}\n".format(weishi_title))
+                # 用户名敏感词过滤
+                elif any(word if word in user_name else False for word in
+                         cls.username_sensitive_words(log_type)) is True:
+                    Common.logger(log_type).info("用户名已中敏感词:{}\n".format(user_name))
+                # 从已下载云文档去重
+                elif str(video_id) in [j for m in Feishu.get_values_batch(log_type, 'weishi', "caa3fa") for j in m]:
+                    Common.logger(log_type).info("视频已下载:{}\n", video_title)
+                # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr
+                elif str(video_id) in [j for n in Feishu.get_values_batch(log_type, 'weishi', "O7fCzr") for j in n]:
+                    Common.logger(log_type).info("视频已存在:{}\n", video_title)
+                else:
+                    # 添加到已下载视频列表
+                    cls.video_count.append(video_id)
+
+                    # feeds工作表,插入首行
+                    Feishu.insert_columns(log_type, 'weishi', "O7fCzr", "ROWS", 1, 2)
+                    # 获取当前时间
+                    get_feeds_time = int(time.time())
+                    # 工作表 feeds 中写入数据
+                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(get_feeds_time))),
+                               "推荐榜",
+                               str(video_id),
+                               video_title,
+                               int(video_play_cnt),
+                               int(video_comment_cnt),
+                               int(video_like_cnt),
+                               int(video_share_cnt),
+                               video_duration,
+                               video_resolution,
+                               time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time / 1000))),
+                               user_name,
+                               user_id,
+                               head_url,
+                               cover_url,
+                               video_url]]
+                    # 等待 1s,防止操作云文档太频繁,导致报错
+                    time.sleep(1)
+                    Feishu.update_values(log_type, 'weishi', "O7fCzr", "A2:T2", values)
+                    Common.logger(log_type).info("视频保存至云文档成功\n")
+                    time.sleep(random.randint(3, 5))
+
+                    # # 每天抓取 50 条
+                    # if len(cls.video_count) >= cls.crawler_count:
+                    #     Common.logger(log_type).info("已抓取{}条数据\n", len(cls.video_count))
+                    #     cls.video_count = []
+                    #     return
         except Exception as e:
             Common.logger(log_type).error("get_feeds异常:{}\n".format(e))