wangkun 2 سال پیش
والد
کامیت
58fb77fdee
2فایلهای تغییر یافته به همراه0 افزوده شده و 520 حذف شده
  1. 0 52
      main/run_follow_list.py
  2. 0 468
      main/xiaoniangao_follow_list.py

+ 0 - 52
main/run_follow_list.py

@@ -1,52 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/5/18
-import datetime
-from datetime import timedelta
-import os
-# import random
-import sys
-import time
-
-sys.path.append(os.getcwd())
-from main.common import Common
-from main.xiaoniangao_follow_list import Follow
-
-
-def follow_job():
-    while True:
-        Common.del_logs("follow")
-        Common.logger("follow").info("开始抓取小年糕关注榜")
-        # 关注用户列表
-        Follow.sub_users("follow")
-        time.sleep(1)
-        # 取消关注用户
-        Follow.unsub_users("follow")
-        time.sleep(1)
-
-        while True:
-            # 前天 <class 'str'>  2022-04-13
-            before_yesterday = (datetime.date.today() + timedelta(days=-2)).strftime("%Y/%m/%d %H:%M:%S")
-            before_yesterday = time.strptime(before_yesterday, "%Y/%m/%d %H:%M:%S")
-            before_yesterday = int(time.mktime(before_yesterday))*1000
-            # 任务结束时间:小于 2022年5月18日
-            # endtime = 1652803200000
-            endtime = before_yesterday
-            follow_job_time = datetime.datetime.now()
-
-            if Follow.download_from_sub("follow", endtime) >= endtime:
-                Follow.download_from_sub("follow", endtime)
-            elif 1 >= follow_job_time.hour >= 0:
-                Common.logger("follow").info("结束今日抓取任务")
-                Follow.next_t_list = [-1]
-                time.sleep(300)
-                break
-            else:
-                Common.logger("follow").info("发布时间大于48小时,结束抓取任务")
-                time.sleep(3)
-                Follow.next_t_list = [-1]
-                break
-
-
-if __name__ == "__main__":
-    follow_job()

+ 0 - 468
main/xiaoniangao_follow_list.py

@@ -1,468 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/5/18
-import time
-import requests
-import urllib3
-from main.common import Common
-from main.feishu_lib import Feishu
-from main.publish import Publish
-
-proxies = {"http": None, "https": None}
-
-
-class Follow:
-    # 翻页初始值
-    next_t_list = [-1]
-
-    # follow_x_b3_traceid = Feishu.get_range_value("follow", "xiaoniangao", "dzcWHw", "C3:C3")[0]
-    # follow_x_token_id = Feishu.get_range_value("follow", "xiaoniangao", "dzcWHw", "C4:C4")[0]
-    # follow_referer = Feishu.get_range_value("follow", "xiaoniangao", "dzcWHw", "C5:C5")[0][0]["link"]
-    # follow_uid = Feishu.get_range_value("follow", "xiaoniangao", "dzcWHw", "C6:C6")[0]
-    # follow_token = Feishu.get_range_value("follow", "xiaoniangao", "dzcWHw", "C7:C7")[0]
-    wechat_sheet = Feishu.get_values_batch("follow", "xiaoniangao", "dzcWHw")
-    follow_x_b3_traceid = wechat_sheet[2][2]
-    follow_x_token_id = wechat_sheet[3][2]
-    follow_referer = wechat_sheet[4][2]
-    follow_uid = wechat_sheet[5][2]
-    follow_token = wechat_sheet[6][2]
-
-    # 过滤敏感词
-    @classmethod
-    def sensitive_words(cls, log_type):
-        # 敏感词库列表
-        word_list = []
-        # 从云文档读取所有敏感词,添加到词库列表
-        lists = Feishu.get_values_batch(log_type, "xiaoniangao", "DRAnZh")
-        for i in lists:
-            for j in i:
-                # 过滤空的单元格内容
-                if j is None:
-                    pass
-                else:
-                    word_list.append(j)
-        return word_list
-
-    # 基础门槛规则
-    @staticmethod
-    def download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt, d_send_time):
-        """
-        下载视频的基本规则
-        :param d_duration: 时长
-        :param d_width: 宽
-        :param d_height: 高
-        :param d_play_cnt: 播放量
-        :param d_like_cnt: 点赞量
-        :param d_share_cnt: 分享量
-        :param d_send_time: 发布时间
-        :return: 满足规则,返回 True;反之,返回 False
-        """
-        # 视频时长
-        if int(float(d_duration)) >= 40:
-            # 宽或高
-            if int(d_width) >= 0 or int(d_height) >= 0:
-                # 播放量
-                if int(d_play_cnt) >= 5000:
-                    # 点赞量
-                    if int(d_like_cnt) >= 0:
-                        # 分享量
-                        if int(d_share_cnt) >= 0:
-                            # 发布时间 <= 48 小时
-                            if int(time.time()) - int(d_send_time) / 1000 <= 172800:
-                                return True
-                            else:
-                                return False
-                        else:
-                            return False
-                    else:
-                        return False
-                else:
-                    return False
-            return False
-        return False
-
-    # 获取关注用户列表
-    @classmethod
-    def get_follow_users(cls, log_type):
-        try:
-            if len(Feishu.get_values_batch(log_type, "xiaoniangao", "oNpThi")) == 1:
-                Common.logger(log_type).info("暂无定向爬取账号")
-            else:
-                follow_list = []
-                nick_list = []
-                for i in range(2, len(Feishu.get_values_batch(log_type, "xiaoniangao", "oNpThi")) + 1):
-                    time.sleep(0.5)
-                    profile_mid = Feishu.get_range_value(
-                        log_type, "xiaoniangao", "oNpThi", "B" + str(i) + ":" + "B" + str(i))[0]
-                    time.sleep(0.5)
-                    nick = \
-                        Feishu.get_range_value(log_type, "xiaoniangao", "oNpThi",
-                                               "C" + str(i) + ":" + "C" + str(i))[0]
-                    nick_list.append(nick)
-                    follow_list.append(profile_mid)
-
-                Common.logger(log_type).info("已获取用户列表:{}", nick_list)
-                return follow_list
-
-        except Exception as e:
-            Common.logger(log_type).error("获取用户列表异常:{}", e)
-
-    # 获取取消关注用户列表
-    @classmethod
-    def get_unfollow_users(cls, log_type):
-        try:
-            if len(Feishu.get_values_batch(log_type, "xiaoniangao", "tuMNhn")) == 1:
-                Common.logger(log_type).info("暂无定向账号")
-            else:
-                unfollow_list = []
-                nick_list = []
-                for i in range(2, len(Feishu.get_values_batch(log_type, "xiaoniangao", "tuMNhn")) + 1):
-                    time.sleep(0.5)
-                    profile_mid = Feishu.get_range_value(
-                        log_type, "xiaoniangao", "tuMNhn", "B" + str(i) + ":" + "B" + str(i))[0]
-                    time.sleep(0.5)
-                    nick = \
-                        Feishu.get_range_value(log_type, "xiaoniangao", "tuMNhn",
-                                               "C" + str(i) + ":" + "C" + str(i))[0]
-                    nick_list.append(nick)
-                    unfollow_list.append(profile_mid)
-
-                Common.logger(log_type).info("取消关注用户列表:{}", nick_list)
-                return unfollow_list
-
-        except Exception as e:
-            Common.logger(log_type).error("获取用户列表异常:{}", e)
-
-    # 关注列表中的用户
-    @classmethod
-    def sub_users(cls, log_type):
-        profile_mids = cls.get_follow_users(log_type)
-        for profile_mid in profile_mids:
-            url = "https://api.xiaoniangao.cn/V1/account/sub_user"
-            headers = {
-                "x-b3-traceid": cls.follow_x_b3_traceid,
-                "X-Token-Id": cls.follow_x_token_id,
-                "content-type": "application/json",
-                "uuid": cls.follow_uid,
-                "Accept-Encoding": "gzip,compress,br,deflate",
-                "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
-                              " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
-                              "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
-                "Referer": cls.follow_referer
-            }
-            data = {
-                "visited_mid": int(profile_mid),
-                "log_common_params": {
-                    "e": [{
-                        "data": {
-                            "page": "profilePage",
-                            "topic": "public",
-                            "type": "follow",
-                            "name": "user",
-                            "smid": str(profile_mid)
-                        },
-                        "ab": {}
-                    }],
-                    "ext": {
-                        "brand": "iPhone",
-                        "device": "iPhone 11",
-                        "os": "iOS 14.7.1",
-                        "weixinver": "8.0.20",
-                        "srcver": "2.24.2",
-                        "net": "wifi",
-                        "scene": "1089"
-                    },
-                    "pj": "1",
-                    "pf": "2",
-                    "session_id": "d53b6125-942b-4ec1-8d22-f9451a35e9f9"
-                },
-                "token": cls.follow_token,
-                "uid": cls.follow_uid,
-                "proj": "ma",
-                "wx_ver": "8.0.20",
-                "code_ver": "3.62.0"
-            }
-            try:
-                urllib3.disable_warnings()
-                r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
-                Common.logger(log_type).info("关注用户:{},{}", profile_mid, r)
-            except Exception as e:
-                Common.logger(log_type).error("关注用户异常:{}", e)
-
-    # 取消关注
-    @classmethod
-    def unsub_users(cls, log_type):
-        unsub_profile_mids = cls.get_unfollow_users(log_type)
-        for profile_mid in unsub_profile_mids:
-            url = "https://api.xiaoniangao.cn/V1/account/unsub_user"
-            headers = {
-                "x-b3-traceid": cls.follow_x_b3_traceid,
-                "X-Token-Id": cls.follow_x_token_id,
-                "content-type": "application/json",
-                "uuid": cls.follow_uid,
-                "Accept-Encoding": "gzip,compress,br,deflate",
-                "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
-                              " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
-                              "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
-                "Referer": cls.follow_referer
-            }
-            data = {
-                "visited_mid": int(profile_mid),
-                "log_common_params": {
-                    "e": [{
-                        "data": {
-                            "page": "profilePage",
-                            "topic": "public",
-                            "type": "unfollow",
-                            "name": "user",
-                            "smid": str(profile_mid)
-                        },
-                        "ab": {}
-                    }],
-                    "ext": {
-                        "brand": "iPhone",
-                        "device": "iPhone 11",
-                        "os": "iOS 14.7.1",
-                        "weixinver": "8.0.20",
-                        "srcver": "2.24.4",
-                        "net": "wifi",
-                        "scene": "1089"
-                    },
-                    "pj": "1",
-                    "pf": "2",
-                    "session_id": "6a2959c7-3f98-411f-8bc9-8d2a8a5c6f16"
-                },
-                "token": cls.follow_token,
-                "uid": cls.follow_uid,
-                "proj": "ma",
-                "wx_ver": "8.0.20",
-                "code_ver": "3.64.1"}
-            try:
-                urllib3.disable_warnings()
-                r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
-                Common.logger(log_type).info("取消关注:{},{}", profile_mid, r)
-            except Exception as e:
-                Common.logger(log_type).error("取消关注异常:{}", e)
-
-    # 从关注列表获取视频,并下载符合规则的视频,再进行上传
-    @classmethod
-    def download_from_sub(cls, log_type, endtime):
-        url = "https://api.xiaoniangao.cn/album/get_user_trends"
-        headers = {
-            "x-b3-traceid": cls.follow_x_b3_traceid,
-            "X-Token-Id": cls.follow_x_token_id,
-            "content-type": "application/json",
-            "uuid": cls.follow_uid,
-            "Accept-Encoding": "gzip,compress,br,deflate",
-            "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
-                          " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
-                          "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
-            "Referer": cls.follow_referer
-        }
-        data = {
-            "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
-            "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
-            "start_t": int(cls.next_t_list[-1]),
-            "limit": 5,
-            "share_width": 625,
-            "share_height": 500,
-            "token": cls.follow_token,
-            "uid": cls.follow_uid,
-            "proj": "ma",
-            "wx_ver": "8.0.20",
-            "code_ver": "3.62.0",
-            "log_common_params": {
-                "e": [{
-                    "data": {
-                        "page": "discoverIndexPage",
-                        "topic": "follow"
-                    }
-                }],
-                "ext": {
-                    "brand": "iPhone",
-                    "device": "iPhone 11",
-                    "os": "iOS 14.7.1",
-                    "weixinver": "8.0.20",
-                    "srcver": "2.24.2",
-                    "net": "wifi",
-                    "scene": "1089"
-                },
-                "pj": "1",
-                "pf": "2",
-                "session_id": "18da9157-5aa6-4955-a849-9160f07ee912"
-            }
-        }
-        try:
-            urllib3.disable_warnings()
-            r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
-            next_t = r.json()["data"]["next_t"]
-            cls.next_t_list.append(next_t)
-            feeds = r.json()["data"]["list"]
-            for i in range(len(feeds)):
-                # 标题
-                video_title = feeds[i]["title"].strip().replace("\n", "") \
-                    .replace("/", "").replace("\r", "").replace("#", "") \
-                    .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
-                    .replace(":", "").replace("*", "").replace("?", "") \
-                    .replace("?", "").replace('"', "").replace("<", "") \
-                    .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号", "")
-
-                # 用户名
-                user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
-                    .replace("/", "").replace("快手", "").replace(" ", "") \
-                    .replace(" ", "").replace("&NBSP", "").replace("\r", "")
-
-                # 视频 ID
-                video_id = feeds[i]["vid"]
-
-                # 播放量
-                video_play_cnt = feeds[i]["play_pv"]
-
-                # 评论数
-                video_comment_cnt = feeds[i]["comment_count"]
-                # 点赞
-                video_like_cnt = feeds[i]["favor"]["total"]
-                # 分享
-                video_share_cnt = feeds[i]["share"]
-                # 时长
-                video_duration = int(feeds[i]["du"] / 1000)
-                # 发布时间
-                video_send_time = feeds[i]["t"]
-
-                # 宽和高
-                video_width = feeds[i]["w"]
-                video_height = feeds[i]["h"]
-                # 头像
-                head_url = feeds[i]["user"]["hurl"]
-                # 用户 ID
-                profile_id = feeds[i]["id"]
-                # 用户 mid
-                profile_mid = feeds[i]["user"]["mid"]
-                # 封面
-                cover_url = feeds[i]["url"]
-                # 视频播放地址
-                video_url = feeds[i]["v_url"]
-
-                Common.logger(log_type).info("标题:{}", video_title)
-                Common.logger(log_type).info("视频ID:{}", video_id)
-                Common.logger(log_type).info("用户名:{}", user_name)
-                Common.logger(log_type).info("播放量:{}", video_play_cnt)
-                Common.logger(log_type).info(
-                    "发布时间:{}", time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
-                Common.logger(log_type).info("播放地址:{}", video_url)
-
-                # 过滤无效视频
-                if video_id == "" or video_url == "" or video_send_time == "":
-                    Common.logger(log_type).info("无效视频")
-                elif int(video_send_time) < endtime:
-                    Common.logger(log_type).info("发布时间超过 48 小时")
-                elif cls.download_rule(
-                        video_duration, video_width, video_height, video_play_cnt,
-                        video_like_cnt, video_share_cnt, video_send_time) is False:
-                    Common.logger(log_type).info("不满足基础门槛规则")
-                # 过滤敏感词
-                elif any(word if word in video_title else False for word in cls.sensitive_words(log_type)) is True:
-                    Common.logger(log_type).info("视频已中敏感词:{}".format(video_title))
-                # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2
-                elif video_id in [j for i in Feishu.get_values_batch(log_type, "xiaoniangao", "yatRv2") for j in i]:
-                    Common.logger(log_type).info("该视频已下载:{}", video_title)
-                # 满足抓取规则
-                else:
-                    Common.logger(log_type).info("开始下载视频:{}", video_title)
-                    # 下载封面
-                    Common.download_method(
-                        log_type=log_type, text="cover", d_name=video_title, d_url=cover_url)
-                    # 下载视频
-                    Common.download_method(
-                        log_type=log_type, text="video", d_name=video_title, d_url=video_url)
-                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-                    with open(r"./videos/" + video_title
-                              + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
-                        f_a.write(str(video_id) + "\n" +
-                                  str(video_title) + "\n" +
-                                  str(video_duration) + "\n" +
-                                  str(video_play_cnt) + "\n" +
-                                  str(video_comment_cnt) + "\n" +
-                                  str(video_like_cnt) + "\n" +
-                                  str(video_share_cnt) + "\n" +
-                                  str(video_width) + "*" + str(video_height) + "\n" +
-                                  str(video_send_time) + "\n" +
-                                  str(user_name) + "\n" +
-                                  str(head_url) + "\n" +
-                                  str(video_url) + "\n" +
-                                  str(cover_url) + "\n" +
-                                  str("xiaoniangao"))
-                    Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
-
-                    # 上传视频
-                    Common.logger(log_type).info("开始上传视频:{}".format(video_title))
-                    our_video_id = Publish.upload_and_publish(log_type, "prod", "play")
-                    # Common.logger(log_type).info("our_video_id:{}", our_video_id)
-                    our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
-                    # Common.logger(log_type).info("our_video_link:{}", our_video_link)
-                    Common.logger(log_type).info("视频上传完成:{}", video_title)
-                    # 上传完成时间
-                    upload_time = int(time.time())
-
-                    # 保存视频信息到云文档
-                    Common.logger(log_type).info("添加视频到云文档:{}", video_title)
-                    # 插入空行
-                    time.sleep(1)
-                    Feishu.insert_columns(log_type, "xiaoniangao", "yatRv2", "ROWS", 1, 2)
-                    # 视频信息写入云文档
-                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(upload_time))),
-                               "定向账号爬取",
-                               video_id,
-                               video_title,
-                               our_video_link,
-                               video_play_cnt,
-                               video_comment_cnt,
-                               video_like_cnt,
-                               video_share_cnt,
-                               video_duration,
-                               str(video_width) + "*" + str(video_height),
-                               time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
-                               user_name,
-                               profile_id,
-                               profile_mid,
-                               head_url,
-                               cover_url,
-                               video_url]]
-                    time.sleep(1)
-                    Feishu.update_values(log_type, "xiaoniangao", "yatRv2", "F2:W2", values)
-
-                    # 保存视频信息到监控表
-                    Common.logger(log_type).info("添加视频到监控表:{}", video_title)
-                    # 插入空行
-                    time.sleep(1)
-                    Feishu.insert_columns(log_type, "monitor", "N7e2yI", "ROWS", 1, 2)
-                    # 视频信息写入监控表
-                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(upload_time))),
-                               video_id,
-                               video_title,
-                               our_video_link,
-                               profile_id,
-                               profile_mid,
-                               user_name,
-                               video_duration,
-                               time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
-                               video_play_cnt]]
-                    time.sleep(1)
-                    Feishu.update_values(log_type, "monitor", "N7e2yI", "F2:O2", values)
-                return int(video_send_time)
-        except Exception as e:
-            Common.logger(log_type).error("下载/上传视频异常:{}", e)
-
-
-if __name__ == "__main__":
-    follow = Follow()
-    # follow.follow_list()
-    # follow.download_follow_videos()
-    # follow.sub_users()
-    # print(follow.unfollow_list())
-    # follow.unsub_users()
-    print(follow.follow_uid)
-    print(follow.follow_token)
-    print(follow.follow_referer)
-    print(follow.follow_x_token_id)
-    print(follow.follow_x_b3_traceid)