пре 2 година · 1c3f111d12
--- a/kanyikan.sh
+++ b/kanyikan.sh
@@ -39,6 +39,9 @@ time=$(date +%H:%M:%S)
 
				 #echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量..." >> ${log_path}
			
 
				 #cd ~ && source ${profile_path}
			
 
				 #echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成！" >> ${log_path}
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在更新代码" >> ${log_path}
			
 
				+cd /Users/lieyunye/Desktop/crawler/crawler_kanyikan/ && git pull origin master --force
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成"
			
 
				 
			
 
				 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在检测看一看推荐爬虫服务状态" >> ${log_path}
			
 
				 ps -ef | grep "run_kanyikan_recommend" | grep -v "grep"
			
--- a/main/kanyikan_moment.py
+++ b/main/kanyikan_moment.py
@@ -10,7 +10,7 @@ import urllib3
 
				 sys.path.append(os.getcwd())
			
 
				 from main.feishu_lib import Feishu
			
 
				 from main.common import Common
			
 
				-from main.kanyikan_moment_publish import Publish
			
 
				+from main.publish import Publish
			
 
				 proxies = {"http": None, "https": None}
			
 
				 
			
 
				 
			
@@ -364,7 +364,12 @@ class Moment:
 
				 
			
 
				                     # 上传视频
			
 
				                     Common.logger("moment").info("开始上传视频:{}".format(download_video_title))
			
 
				-                    our_video_id = Publish.upload_and_publish("moment", env, "play")
			
 
				+                    our_video_id = Publish.upload_and_publish(log_type="moment",
			
 
				+                                                              crawler="kanyikan",
			
 
				+                                                              strategy="朋友圈抓取策略",
			
 
				+                                                              our_uid="moment",
			
 
				+                                                              env=env,
			
 
				+                                                              oss_endpoint="out")
			
 
				                     our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
			
 
				                     Common.logger("moment").info("视频上传完成:{}", download_video_title)
			
 
				 
			
--- a/main/kanyikan_moment_publish.py
+++ b/main/kanyikan_moment_publish.py
@@ -1,259 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2022/6/10
			
 
				-"""
			
 
				-上传视频到阿里云 OSS
			
 
				-上传视频到管理后台
			
 
				-"""
			
 
				-import json
			
 
				-import os
			
 
				-import random
			
 
				-import shutil
			
 
				-import time
			
 
				-
			
 
				-import oss2
			
 
				-import requests
			
 
				-import urllib3
			
 
				-from main.common import Common
			
 
				-
			
 
				-proxies = {"http": None, "https": None}
			
 
				-
			
 
				-
			
 
				-class Publish:
			
 
				-    @classmethod
			
 
				-    def publish_video_dev(cls, log_type, request_data):
			
 
				-        """
			
 
				-        loginUid  站内uid (随机)
			
 
				-        appType  默认：888888
			
 
				-        crawlerSrcId   站外视频ID
			
 
				-        crawlerSrcCode   渠道（自定义 KYK）
			
 
				-        crawlerSrcPublishTimestamp  视频原发布时间
			
 
				-        crawlerTaskTimestamp   爬虫创建时间（可以是当前时间）
			
 
				-        videoPath  视频oss地址
			
 
				-        coverImgPath  视频封面oss地址
			
 
				-        title  标题
			
 
				-        totalTime  视频时长
			
 
				-        viewStatus  视频的有效状态 默认1
			
 
				-        versionCode  版本 默认1
			
 
				-        :return:
			
 
				-        """
			
 
				-        # Common.logger(log_type).info('publish request data: {}'.format(request_data))
			
 
				-        result = cls.request_post('https://videotest.yishihui.com/longvideoapi/crawler/video/send', request_data)
			
 
				-        # Common.logger(log_type).info('publish result: {}'.format(result))
			
 
				-        video_id = result["data"]["id"]
			
 
				-        if result['code'] != 0:
			
 
				-            Common.logger(log_type).error('pushlish failure msg = {}'.format(result['msg']))
			
 
				-        else:
			
 
				-            Common.logger(log_type).info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
			
 
				-        return video_id
			
 
				-
			
 
				-    @classmethod
			
 
				-    def publish_video_prod(cls, log_type, request_data):
			
 
				-        """
			
 
				-        loginUid  站内uid (随机)
			
 
				-        appType  默认：888888
			
 
				-        crawlerSrcId   站外视频ID
			
 
				-        crawlerSrcCode   渠道（自定义 KYK）
			
 
				-        crawlerSrcPublishTimestamp  视频原发布时间
			
 
				-        crawlerTaskTimestamp   爬虫创建时间（可以是当前时间）
			
 
				-        videoPath  视频oss地址
			
 
				-        coverImgPath  视频封面oss地址
			
 
				-        title  标题
			
 
				-        totalTime  视频时长
			
 
				-        viewStatus  视频的有效状态 默认1
			
 
				-        versionCode  版本 默认1
			
 
				-        :return:
			
 
				-        """
			
 
				-        result = cls.request_post('https://longvideoapi.piaoquantv.com/longvideoapi/crawler/video/send', request_data)
			
 
				-        # Common.logger(log_type).info('publish result: {}'.format(result))
			
 
				-        video_id = result["data"]["id"]
			
 
				-        if result['code'] != 0:
			
 
				-            Common.logger(log_type).error('pushlish failure msg = {}'.format(result['msg']))
			
 
				-        else:
			
 
				-            Common.logger(log_type).info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
			
 
				-        return video_id
			
 
				-
			
 
				-    @classmethod
			
 
				-    def request_post(cls, request_url, request_data):
			
 
				-        """
			
 
				-        post 请求 HTTP接口
			
 
				-        :param request_url: 接口URL
			
 
				-        :param request_data: 请求参数
			
 
				-        :return: res_data json格式
			
 
				-        """
			
 
				-        urllib3.disable_warnings()
			
 
				-        response = requests.post(url=request_url, data=request_data, proxies=proxies, verify=False)
			
 
				-        if response.status_code == 200:
			
 
				-            res_data = json.loads(response.text)
			
 
				-            return res_data
			
 
				-
			
 
				-    # 以下代码展示了基本的文件上传、下载、罗列、删除用法。
			
 
				-
			
 
				-    # 首先初始化AccessKeyId、AccessKeySecret、Endpoint等信息。
			
 
				-    # 通过环境变量获取，或者把诸如“<你的AccessKeyId>”替换成真实的AccessKeyId等。
			
 
				-    #
			
 
				-    # 以杭州区域为例，Endpoint可以是：
			
 
				-    #   http://oss-cn-hangzhou.aliyuncs.com
			
 
				-    #   https://oss-cn-hangzhou.aliyuncs.com
			
 
				-    # 分别以HTTP、HTTPS协议访问。
			
 
				-    access_key_id = os.getenv('OSS_TEST_ACCESS_KEY_ID', 'LTAIP6x1l3DXfSxm')
			
 
				-    access_key_secret = os.getenv('OSS_TEST_ACCESS_KEY_SECRET', 'KbTaM9ars4OX3PMS6Xm7rtxGr1FLon')
			
 
				-    bucket_name = os.getenv('OSS_TEST_BUCKET', 'art-pubbucket')
			
 
				-    # endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou-internal.aliyuncs.com')
			
 
				-    endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou.aliyuncs.com')
			
 
				-
			
 
				-    # 确认上面的参数都填写正确了
			
 
				-    for param in (access_key_id, access_key_secret, bucket_name, endpoint):
			
 
				-        assert '<' not in param, '请设置参数：' + param
			
 
				-
			
 
				-    # 创建Bucket对象，所有Object相关的接口都可以通过Bucket对象来进行
			
 
				-    bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name)
			
 
				-
			
 
				-    """
			
 
				-    处理流程：
			
 
				-    1. 定时（每天凌晨1点执行一次）循环files文件下的内容 结构：files -> 视频文件夹 -> 视频文件 + 封面图 + 基本信息
			
 
				-    2. 视频文件和封面上传到oss
			
 
				-    - 视频文件oss目录  longvideo/crawler_local/video/prod/文件名
			
 
				-    - 视频封面oss目录  longvideo/crawler_local/image/prod/文件名
			
 
				-    3. 发布视频
			
 
				-    - 读取 基本信息 调用发布接口
			
 
				-    """
			
 
				-    # env 日期20220225 文件名
			
 
				-    oss_file_path_video = r'longvideo/crawler_local/video/{}/{}/{}'
			
 
				-    oss_file_path_image = r'longvideo/crawler_local/image/{}/{}/{}'
			
 
				-
			
 
				-    @classmethod
			
 
				-    def put_file(cls, log_type, oss_file, local_file):
			
 
				-        cls.bucket.put_object_from_file(oss_file, local_file)
			
 
				-        Common.logger(log_type).info("put oss file = {}, local file = {} success".format(oss_file, local_file))
			
 
				-
			
 
				-    # 清除本地文件
			
 
				-    @classmethod
			
 
				-    def remove_local_file(cls, log_type, local_file):
			
 
				-        os.remove(local_file)
			
 
				-        Common.logger(log_type).info("remove local file = {} success".format(local_file))
			
 
				-
			
 
				-    # 清除本地文件夹
			
 
				-    @classmethod
			
 
				-    def remove_local_file_dir(cls, log_type, local_file):
			
 
				-        os.rmdir(local_file)
			
 
				-        Common.logger(log_type).info("remove local file dir = {} success".format(local_file))
			
 
				-
			
 
				-    local_file_path = './videos'
			
 
				-    video_file = 'video'
			
 
				-    image_file = 'image'
			
 
				-    info_file = 'info'
			
 
				-    uids_dev_up = [6267140]
			
 
				-    uids_dev_play = [6267141]
			
 
				-    uids_prod_up = [20631208, 20631209, 20631210, 20631211, 20631212,
			
 
				-                    20631213, 20631214, 20631215, 20631216, 20631217]
			
 
				-    uids_prod_play = [20631208, 20631209, 20631210, 20631211, 20631212,
			
 
				-                      20631213, 20631214, 20631215, 20631216, 20631217,
			
 
				-                      20631223, 20631224, 20631225, 20631226, 20631227]
			
 
				-
			
 
				-    @classmethod
			
 
				-    def upload_and_publish(cls, log_type, env, job):
			
 
				-        """
			
 
				-        上传视频到 oss
			
 
				-        :param log_type: 哪个日志
			
 
				-        :param env: 测试环境：dev，正式环境：prod
			
 
				-        :param job: 上升榜：up，播放量：play
			
 
				-        """
			
 
				-        Common.logger(log_type).info("upload_and_publish starting...")
			
 
				-        today = time.strftime("%Y%m%d", time.localtime())
			
 
				-        # videos 目录下的所有视频文件夹
			
 
				-        files = os.listdir(cls.local_file_path)
			
 
				-        for f in files:
			
 
				-            try:
			
 
				-                # 单个视频文件夹
			
 
				-                fi_d = os.path.join(cls.local_file_path, f)
			
 
				-                # 确认为视频文件夹
			
 
				-                if os.path.isdir(fi_d):
			
 
				-                    Common.logger(log_type).info('dir = {}'.format(fi_d))
			
 
				-                    # 列出所有视频文件夹
			
 
				-                    dir_files = os.listdir(fi_d)
			
 
				-                    data = {'appType': '888888', 'crawlerSrcCode': 'KANYIKAN', 'viewStatus': '1', 'versionCode': '1'}
			
 
				-                    now_timestamp = int(round(time.time() * 1000))
			
 
				-                    data['crawlerTaskTimestamp'] = str(now_timestamp)
			
 
				-                    global uid
			
 
				-                    if env == "dev" and job == "up":
			
 
				-                        uid = str(random.choice(cls.uids_dev_up))
			
 
				-                    elif env == "dev" and job == "play":
			
 
				-                        uid = str(random.choice(cls.uids_dev_play))
			
 
				-                    elif env == "prod" and job == "up":
			
 
				-                        uid = str(random.choice(cls.uids_prod_up))
			
 
				-                    elif env == "prod" and job == "play":
			
 
				-                        uid = str(random.choice(cls.uids_prod_play))
			
 
				-                    data['loginUid'] = uid
			
 
				-                    # 单个视频文件夹下的所有视频文件
			
 
				-                    for fi in dir_files:
			
 
				-                        # 视频文件夹下的所有文件路径
			
 
				-                        fi_path = fi_d + '/' + fi
			
 
				-                        Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
			
 
				-                        # 读取 info.txt，赋值给 data
			
 
				-                        if cls.info_file in fi:
			
 
				-                            f = open(fi_path, "r", encoding="UTF-8")
			
 
				-                            # 读取数据 数据准确性写入的时候保证 读取暂不处理
			
 
				-                            for i in range(14):
			
 
				-                                line = f.readline()
			
 
				-                                line = line.replace('\n', '')
			
 
				-                                if line is not None and len(line) != 0 and not line.isspace():
			
 
				-                                    Common.logger(log_type).info("line = {}".format(line))
			
 
				-                                    if i == 0:
			
 
				-                                        data['crawlerSrcId'] = line
			
 
				-                                    elif i == 1:
			
 
				-                                        data['title'] = line
			
 
				-                                    elif i == 2:
			
 
				-                                        data['totalTime'] = line
			
 
				-                                    elif i == 8:
			
 
				-                                        data['crawlerSrcPublishTimestamp'] = line
			
 
				-                                else:
			
 
				-                                    Common.logger(log_type).warning("{} line is None".format(fi_path))
			
 
				-                            f.close()
			
 
				-                            # remove info.txt
			
 
				-                            cls.remove_local_file(log_type, fi_path)
			
 
				-                    # 刷新数据
			
 
				-                    dir_files = os.listdir(fi_d)
			
 
				-                    for fi in dir_files:
			
 
				-                        fi_path = fi_d + '/' + fi
			
 
				-                        Common.logger(log_type).info('dir fi_path = {}'.format(fi_path))
			
 
				-                        # 上传oss
			
 
				-                        if cls.video_file in fi:
			
 
				-                            global oss_video_file
			
 
				-                            if env == "dev":
			
 
				-                                oss_video_file = cls.oss_file_path_video.format("dev", today, data['crawlerSrcId'])
			
 
				-                            elif env == "prod":
			
 
				-                                oss_video_file = cls.oss_file_path_video.format("prod", today, data['crawlerSrcId'])
			
 
				-                            Common.logger(log_type).info("oss_video_file = {}".format(oss_video_file))
			
 
				-                            cls.put_file(log_type, oss_video_file, fi_path)
			
 
				-                            data['videoPath'] = oss_video_file
			
 
				-                            Common.logger(log_type).info("videoPath = {}".format(oss_video_file))
			
 
				-                        elif cls.image_file in fi:
			
 
				-                            global oss_image_file
			
 
				-                            if env == "dev":
			
 
				-                                oss_image_file = cls.oss_file_path_image.format("env", today, data['crawlerSrcId'])
			
 
				-                            elif env == "prod":
			
 
				-                                oss_image_file = cls.oss_file_path_image.format("prod", today, data['crawlerSrcId'])
			
 
				-                            Common.logger(log_type).info("oss_image_file = {}".format(oss_image_file))
			
 
				-                            cls.put_file(log_type, oss_image_file, fi_path)
			
 
				-                            data['coverImgPath'] = oss_image_file
			
 
				-                            Common.logger(log_type).info("coverImgPath = {}".format(oss_image_file))
			
 
				-                        # 全部remove
			
 
				-                        cls.remove_local_file(log_type, fi_path)
			
 
				-
			
 
				-                    # 发布
			
 
				-                    if env == "dev":
			
 
				-                        video_id = cls.publish_video_dev(log_type, data)
			
 
				-                    elif env == "prod":
			
 
				-                        video_id = cls.publish_video_prod(log_type, data)
			
 
				-                    else:
			
 
				-                        video_id = cls.publish_video_dev(log_type, data)
			
 
				-                    cls.remove_local_file_dir(log_type, fi_d)
			
 
				-                    return video_id
			
 
				-
			
 
				-                else:
			
 
				-                    Common.logger(log_type).error('file not a dir = {}'.format(fi_d))
			
 
				-            except Exception as e:
			
 
				-                # 删除视频文件夹
			
 
				-                shutil.rmtree("./videos/" + f + "/")
			
 
				-                Common.logger(log_type).exception('upload_and_publish error', e)
			
--- a/main/kanyikan_recommend.py
+++ b/main/kanyikan_recommend.py
@@ -50,115 +50,115 @@ class Kanyikanrecommend:
 
				         while True:
			
 
				             for page in range(1, 101):
			
 
				                 Common.logger(log_type).info(f"正在抓取第{page}页")
			
 
				-                # try:
			
 
				-                session = Common.get_session(log_type)
			
 
				-                if session is None:
			
 
				-                    time.sleep(1)
			
 
				-                    continue
			
 
				-                url = 'https://search.weixin.qq.com/cgi-bin/recwxa/recwxavideolist?'
			
 
				-                header = {
			
 
				-                    "Connection": "keep-alive",
			
 
				-                    "content-type": "application/json",
			
 
				-                    "Accept-Encoding": "gzip,compress,br,deflate",
			
 
				-                    "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
			
 
				-                                  "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.18(0x18001236) "
			
 
				-                                  "NetType/WIFI Language/zh_CN",
			
 
				-                    "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/234/page-frame.html",
			
 
				-                }
			
 
				-                params = {
			
 
				-                    'session': session,
			
 
				-                    "offset": 0,
			
 
				-                    "wxaVersion": "3.9.2",
			
 
				-                    "count": "10",
			
 
				-                    "channelid": "208",
			
 
				-                    "scene": '310',
			
 
				-                    "subscene": '1089',
			
 
				-                    "clientVersion": '8.0.18',
			
 
				-                    "sharesearchid": '0',
			
 
				-                    "nettype": 'wifi',
			
 
				-                    "switchprofile": "0",
			
 
				-                    "switchnewuser": "0",
			
 
				-                }
			
 
				-                urllib3.disable_warnings()
			
 
				-                response = requests.get(url=url, headers=header, params=params, proxies=proxies, verify=False)
			
 
				-                if "data" not in response.text:
			
 
				-                    Common.logger(log_type).info("获取视频list时，session过期，随机睡眠 31-50 秒")
			
 
				-                    # 如果返回空信息，则随机睡眠 31-40 秒
			
 
				-                    time.sleep(random.randint(31, 40))
			
 
				-                    continue
			
 
				-                elif "items" not in response.json()["data"]:
			
 
				-                    Common.logger(log_type).info(f"get_feeds:{response.json()}，随机睡眠 1-3 分钟")
			
 
				-                    # 如果返回空信息，则随机睡眠 1-3 分钟
			
 
				-                    time.sleep(random.randint(60, 180))
			
 
				-                    continue
			
 
				-                feeds = response.json().get("data", {}).get("items", "")
			
 
				-                if feeds == "":
			
 
				-                    Common.logger(log_type).info(f"feeds:{feeds}")
			
 
				-                    time.sleep(random.randint(31, 40))
			
 
				-                    continue
			
 
				-                for i in range(len(feeds)):
			
 
				-                    # try:
			
 
				-                    video_title = feeds[i].get("title", "").strip().replace("\n", "") \
			
 
				-                        .replace("/", "").replace("\\", "").replace("\r", "") \
			
 
				-                        .replace(":", "").replace("*", "").replace("？", "") \
			
 
				-                        .replace("?", "").replace('"', "").replace("<", "") \
			
 
				-                        .replace(">", "").replace("|", "").replace(" ", "") \
			
 
				-                        .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
			
 
				-                        .replace("'", "").replace("#", "").replace("Merge", "")
			
 
				-                    publish_time_stamp = feeds[i].get("date", 0)
			
 
				-                    publish_time_str = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(publish_time_stamp))
			
 
				-                    # 获取播放地址
			
 
				-                    if "videoInfo" not in feeds[i]:
			
 
				-                        video_url = ""
			
 
				-                    elif "mpInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
			
 
				-                        if len(feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
			
 
				-                            video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
			
 
				-                        else:
			
 
				-                            video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
			
 
				-                    elif "ctnInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
			
 
				-                        video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
			
 
				-                    else:
			
 
				-                        video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
			
 
				-                    video_dict = {
			
 
				-                        "video_title": video_title,
			
 
				-                        "video_id":  feeds[i].get("videoId", ""),
			
 
				-                        "play_cnt":  feeds[i].get("playCount", 0),
			
 
				-                        "like_cnt":  feeds[i].get("liked_cnt", 0),
			
 
				-                        "comment_cnt":  feeds[i].get("comment_cnt", 0),
			
 
				-                        "share_cnt":  feeds[i].get("shared_cnt", 0),
			
 
				-                        "duration":  feeds[i].get("mediaDuration", 0),
			
 
				-                        "video_width":  feeds[i].get("short_video_info", {}).get("width", 0),
			
 
				-                        "video_height":  feeds[i].get("short_video_info", {}).get("height", 0),
			
 
				-                        "publish_time_stamp":  publish_time_stamp,
			
 
				-                        "publish_time_str":  publish_time_str,
			
 
				-                        "user_name": feeds[i].get("source", "").strip().replace("\n", ""),
			
 
				-                        "user_id": feeds[i].get("openid", ""),
			
 
				-                        "avatar_url": feeds[i].get("bizIcon", ""),
			
 
				-                        "cover_url": feeds[i].get("thumbUrl", ""),
			
 
				-                        "video_url": video_url,
			
 
				-                        "session": session,
			
 
				+                try:
			
 
				+                    session = Common.get_session(log_type)
			
 
				+                    if session is None:
			
 
				+                        time.sleep(1)
			
 
				+                        continue
			
 
				+                    url = 'https://search.weixin.qq.com/cgi-bin/recwxa/recwxavideolist?'
			
 
				+                    header = {
			
 
				+                        "Connection": "keep-alive",
			
 
				+                        "content-type": "application/json",
			
 
				+                        "Accept-Encoding": "gzip,compress,br,deflate",
			
 
				+                        "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
			
 
				+                                      "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.18(0x18001236) "
			
 
				+                                      "NetType/WIFI Language/zh_CN",
			
 
				+                        "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/234/page-frame.html",
			
 
				+                    }
			
 
				+                    params = {
			
 
				+                        'session': session,
			
 
				+                        "offset": 0,
			
 
				+                        "wxaVersion": "3.9.2",
			
 
				+                        "count": "10",
			
 
				+                        "channelid": "208",
			
 
				+                        "scene": '310',
			
 
				+                        "subscene": '1089',
			
 
				+                        "clientVersion": '8.0.18',
			
 
				+                        "sharesearchid": '0',
			
 
				+                        "nettype": 'wifi',
			
 
				+                        "switchprofile": "0",
			
 
				+                        "switchnewuser": "0",
			
 
				                     }
			
 
				-                    for k, v in video_dict.items():
			
 
				-                        Common.logger(log_type).info(f"{k}:{v}")
			
 
				+                    urllib3.disable_warnings()
			
 
				+                    response = requests.get(url=url, headers=header, params=params, proxies=proxies, verify=False)
			
 
				+                    if "data" not in response.text:
			
 
				+                        Common.logger(log_type).info("获取视频list时，session过期，随机睡眠 31-50 秒")
			
 
				+                        # 如果返回空信息，则随机睡眠 31-40 秒
			
 
				+                        time.sleep(random.randint(31, 40))
			
 
				+                        continue
			
 
				+                    elif "items" not in response.json()["data"]:
			
 
				+                        Common.logger(log_type).info(f"get_feeds:{response.json()}，随机睡眠 1-3 分钟")
			
 
				+                        # 如果返回空信息，则随机睡眠 1-3 分钟
			
 
				+                        time.sleep(random.randint(60, 180))
			
 
				+                        continue
			
 
				+                    feeds = response.json().get("data", {}).get("items", "")
			
 
				+                    if feeds == "":
			
 
				+                        Common.logger(log_type).info(f"feeds:{feeds}")
			
 
				+                        time.sleep(random.randint(31, 40))
			
 
				+                        continue
			
 
				+                    for i in range(len(feeds)):
			
 
				+                        try:
			
 
				+                            video_title = feeds[i].get("title", "").strip().replace("\n", "") \
			
 
				+                                .replace("/", "").replace("\\", "").replace("\r", "") \
			
 
				+                                .replace(":", "").replace("*", "").replace("？", "") \
			
 
				+                                .replace("?", "").replace('"', "").replace("<", "") \
			
 
				+                                .replace(">", "").replace("|", "").replace(" ", "") \
			
 
				+                                .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
			
 
				+                                .replace("'", "").replace("#", "").replace("Merge", "")
			
 
				+                            publish_time_stamp = feeds[i].get("date", 0)
			
 
				+                            publish_time_str = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(publish_time_stamp))
			
 
				+                            # 获取播放地址
			
 
				+                            if "videoInfo" not in feeds[i]:
			
 
				+                                video_url = ""
			
 
				+                            elif "mpInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
			
 
				+                                if len(feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
			
 
				+                                    video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
			
 
				+                                else:
			
 
				+                                    video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
			
 
				+                            elif "ctnInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
			
 
				+                                video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
			
 
				+                            else:
			
 
				+                                video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
			
 
				+                            video_dict = {
			
 
				+                                "video_title": video_title,
			
 
				+                                "video_id":  feeds[i].get("videoId", ""),
			
 
				+                                "play_cnt":  feeds[i].get("playCount", 0),
			
 
				+                                "like_cnt":  feeds[i].get("liked_cnt", 0),
			
 
				+                                "comment_cnt":  feeds[i].get("comment_cnt", 0),
			
 
				+                                "share_cnt":  feeds[i].get("shared_cnt", 0),
			
 
				+                                "duration":  feeds[i].get("mediaDuration", 0),
			
 
				+                                "video_width":  feeds[i].get("short_video_info", {}).get("width", 0),
			
 
				+                                "video_height":  feeds[i].get("short_video_info", {}).get("height", 0),
			
 
				+                                "publish_time_stamp":  publish_time_stamp,
			
 
				+                                "publish_time_str":  publish_time_str,
			
 
				+                                "user_name": feeds[i].get("source", "").strip().replace("\n", ""),
			
 
				+                                "user_id": feeds[i].get("openid", ""),
			
 
				+                                "avatar_url": feeds[i].get("bizIcon", ""),
			
 
				+                                "cover_url": feeds[i].get("thumbUrl", ""),
			
 
				+                                "video_url": video_url,
			
 
				+                                "session": session,
			
 
				+                            }
			
 
				+                            for k, v in video_dict.items():
			
 
				+                                Common.logger(log_type).info(f"{k}:{v}")
			
 
				 
			
 
				-                    if video_dict["video_id"] == "" \
			
 
				-                            or video_dict["video_title"] == ""\
			
 
				-                            or video_dict["video_url"] == "":
			
 
				-                        Common.logger(log_type).info("无效视频\n")
			
 
				-                    elif cls.download_rule(video_dict) is False:
			
 
				-                        Common.logger(log_type).info("不满足抓取规则\n")
			
 
				-                    elif any(str(word) if str(word) in video_title else False for word in cls.get_filter_word(log_type, crawler)) is True:
			
 
				-                        Common.logger(log_type).info("视频已中过滤词\n")
			
 
				-                    elif video_dict["video_id"] in [j for i in Feishu.get_values_batch(log_type, crawler, "ho98Ov") for j in i]:
			
 
				-                        Common.logger(log_type).info("视频已下载\n")
			
 
				-                    elif video_dict["video_id"] in [j for i in Feishu.get_values_batch(log_type, crawler, "20ce0c") for j in i]:
			
 
				-                        Common.logger(log_type).info("视频已下载\n")
			
 
				-                    else:
			
 
				-                        cls.download_publish(log_type, crawler, video_dict, env)
			
 
				-                #         except Exception as e:
			
 
				-                #             Common.logger(log_type).error(f"抓取单条视频异常:{e}\n")
			
 
				-                # except Exception as e:
			
 
				-                #     Common.logger(log_type).error(f"抓取第{page}页时异常:{e}\n")
			
 
				+                            if video_dict["video_id"] == "" \
			
 
				+                                    or video_dict["video_title"] == ""\
			
 
				+                                    or video_dict["video_url"] == "":
			
 
				+                                Common.logger(log_type).info("无效视频\n")
			
 
				+                            elif cls.download_rule(video_dict) is False:
			
 
				+                                Common.logger(log_type).info("不满足抓取规则\n")
			
 
				+                            elif any(str(word) if str(word) in video_title else False for word in cls.get_filter_word(log_type, crawler)) is True:
			
 
				+                                Common.logger(log_type).info("视频已中过滤词\n")
			
 
				+                            elif video_dict["video_id"] in [j for i in Feishu.get_values_batch(log_type, crawler, "ho98Ov") for j in i]:
			
 
				+                                Common.logger(log_type).info("视频已下载\n")
			
 
				+                            elif video_dict["video_id"] in [j for i in Feishu.get_values_batch(log_type, crawler, "20ce0c") for j in i]:
			
 
				+                                Common.logger(log_type).info("视频已下载\n")
			
 
				+                            else:
			
 
				+                                cls.download_publish(log_type, crawler, video_dict, env)
			
 
				+                        except Exception as e:
			
 
				+                            Common.logger(log_type).error(f"抓取单条视频异常:{e}\n")
			
 
				+                except Exception as e:
			
 
				+                    Common.logger(log_type).error(f"抓取第{page}页时异常:{e}\n")
			
 
				 
			
 
				     @classmethod
			
 
				     def download_publish(cls, log_type, crawler, video_dict, env):
			
--- a/main/kanyikan_recommend_publish.py
+++ b/main/kanyikan_recommend_publish.py
@@ -1,262 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2022/4/18
			
 
				-"""
			
 
				-上传视频到阿里云 OSS
			
 
				-上传视频到管理后台
			
 
				-"""
			
 
				-import json
			
 
				-import os
			
 
				-import random
			
 
				-import time
			
 
				-import oss2
			
 
				-import requests
			
 
				-import urllib3
			
 
				-from main.common import Common
			
 
				-proxies = {"http": None, "https": None}
			
 
				-
			
 
				-
			
 
				-class Publish:
			
 
				-    @classmethod
			
 
				-    def publish_video_dev(cls, log_type, request_data):
			
 
				-        """
			
 
				-        loginUid  站内uid (随机)
			
 
				-        appType  默认：888888
			
 
				-        crawlerSrcId   站外视频ID
			
 
				-        crawlerSrcCode   渠道（自定义 KYK）
			
 
				-        crawlerSrcPublishTimestamp  视频原发布时间
			
 
				-        crawlerTaskTimestamp   爬虫创建时间（可以是当前时间）
			
 
				-        videoPath  视频oss地址
			
 
				-        coverImgPath  视频封面oss地址
			
 
				-        title  标题
			
 
				-        totalTime  视频时长
			
 
				-        viewStatus  视频的有效状态 默认1
			
 
				-        versionCode  版本 默认1
			
 
				-        :return:
			
 
				-        """
			
 
				-        result = cls.request_post('https://videotest.yishihui.com/longvideoapi/crawler/video/send', request_data)
			
 
				-        # Common.logger(log_type).info('publish result: {}', result)
			
 
				-        video_id = result["data"]["id"]
			
 
				-        if result['code'] != 0:
			
 
				-            Common.logger(log_type).error('pushlish failure msg = {}', result['msg'])
			
 
				-        else:
			
 
				-            Common.logger(log_type).info('publish success video_id = : {}', request_data['crawlerSrcId'])
			
 
				-        return video_id
			
 
				-
			
 
				-    @classmethod
			
 
				-    def publish_video_prod(cls, log_type, request_data):
			
 
				-        """
			
 
				-        loginUid  站内uid (随机)
			
 
				-        appType  默认：888888
			
 
				-        crawlerSrcId   站外视频ID
			
 
				-        crawlerSrcCode   渠道（自定义 KYK）
			
 
				-        crawlerSrcPublishTimestamp  视频原发布时间
			
 
				-        crawlerTaskTimestamp   爬虫创建时间（可以是当前时间）
			
 
				-        videoPath  视频oss地址
			
 
				-        coverImgPath  视频封面oss地址
			
 
				-        title  标题
			
 
				-        totalTime  视频时长
			
 
				-        viewStatus  视频的有效状态 默认1
			
 
				-        versionCode  版本 默认1
			
 
				-        :return:
			
 
				-        """
			
 
				-        result = cls.request_post('https://longvideoapi.piaoquantv.com/longvideoapi/crawler/video/send', request_data)
			
 
				-        # Common.logger(log_type).info('publish result: {}', result)
			
 
				-        video_id = result["data"]["id"]
			
 
				-        if result['code'] != 0:
			
 
				-            Common.logger(log_type).error('pushlish failure msg = {}', result['msg'])
			
 
				-        else:
			
 
				-            Common.logger(log_type).info('publish success video_id = : {}', request_data['crawlerSrcId'])
			
 
				-        return video_id
			
 
				-
			
 
				-    @classmethod
			
 
				-    def request_post(cls, request_url, request_data):
			
 
				-        """
			
 
				-        post 请求 HTTP接口
			
 
				-        :param request_url: 接口URL
			
 
				-        :param request_data: 请求参数
			
 
				-        :return: res_data json格式
			
 
				-        """
			
 
				-        urllib3.disable_warnings()
			
 
				-        response = requests.post(url=request_url, data=request_data, proxies=proxies, verify=False)
			
 
				-        if response.status_code == 200:
			
 
				-            res_data = json.loads(response.text)
			
 
				-            return res_data
			
 
				-
			
 
				-    # 以下代码展示了基本的文件上传、下载、罗列、删除用法。
			
 
				-
			
 
				-    # 首先初始化AccessKeyId、AccessKeySecret、Endpoint等信息。
			
 
				-    # 通过环境变量获取，或者把诸如“<你的AccessKeyId>”替换成真实的AccessKeyId等。
			
 
				-    #
			
 
				-    # 以杭州区域为例，Endpoint可以是：
			
 
				-    #   http://oss-cn-hangzhou.aliyuncs.com
			
 
				-    #   https://oss-cn-hangzhou.aliyuncs.com
			
 
				-    # 分别以HTTP、HTTPS协议访问。
			
 
				-    access_key_id = os.getenv('OSS_TEST_ACCESS_KEY_ID', 'LTAIP6x1l3DXfSxm')
			
 
				-    access_key_secret = os.getenv('OSS_TEST_ACCESS_KEY_SECRET', 'KbTaM9ars4OX3PMS6Xm7rtxGr1FLon')
			
 
				-    bucket_name = os.getenv('OSS_TEST_BUCKET', 'art-pubbucket')
			
 
				-    # endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou-internal.aliyuncs.com')
			
 
				-    endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou.aliyuncs.com')
			
 
				-
			
 
				-    # 确认上面的参数都填写正确了
			
 
				-    for param in (access_key_id, access_key_secret, bucket_name, endpoint):
			
 
				-        assert '<' not in param, '请设置参数：' + param
			
 
				-
			
 
				-    # 创建Bucket对象，所有Object相关的接口都可以通过Bucket对象来进行
			
 
				-    bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name)
			
 
				-
			
 
				-    """
			
 
				-    处理流程：
			
 
				-    1. 定时（每天凌晨1点执行一次）循环files文件下的内容 结构：files -> 视频文件夹 -> 视频文件 + 封面图 + 基本信息
			
 
				-    2. 视频文件和封面上传到oss
			
 
				-    - 视频文件oss目录  longvideo/crawler_local/video/prod/文件名
			
 
				-    - 视频封面oss目录  longvideo/crawler_local/image/prod/文件名
			
 
				-    3. 发布视频
			
 
				-    - 读取 基本信息 调用发布接口
			
 
				-    """
			
 
				-    # env 日期20220225 文件名
			
 
				-    oss_file_path_video = r'longvideo/crawler_local/video/{}/{}/{}'
			
 
				-    oss_file_path_image = r'longvideo/crawler_local/image/{}/{}/{}'
			
 
				-
			
 
				-    @classmethod
			
 
				-    def put_file(cls, log_type, oss_file, local_file):
			
 
				-        cls.bucket.put_object_from_file(oss_file, local_file)
			
 
				-        Common.logger(log_type).info("put oss file = {}, local file = {} success", oss_file, local_file)
			
 
				-
			
 
				-    # 清除本地文件
			
 
				-    @classmethod
			
 
				-    def remove_local_file(cls, log_type, local_file):
			
 
				-        os.remove(local_file)
			
 
				-        Common.logger(log_type).info("remove local file = {} success", local_file)
			
 
				-
			
 
				-    # 清除本地文件夹
			
 
				-    @classmethod
			
 
				-    def remove_local_file_dir(cls, log_type, local_file):
			
 
				-        os.rmdir(local_file)
			
 
				-        Common.logger(log_type).info("remove local file dir = {} success", local_file)
			
 
				-
			
 
				-    local_file_path = './videos'
			
 
				-    video_file = 'video'
			
 
				-    image_file = 'image'
			
 
				-    info_file = 'info'
			
 
				-    uids_dev_up = [6267140]
			
 
				-    uids_dev_play = [6267141]
			
 
				-    uids_dev_recommend = [6267140, 6267141, 6267824]
			
 
				-    uids_prod_up = [20631208, 20631209, 20631210, 20631211, 20631212,
			
 
				-                    20631213, 20631214, 20631215, 20631216, 20631217]
			
 
				-    uids_prod_play = [20631208, 20631209, 20631210, 20631211, 20631212,
			
 
				-                      20631213, 20631214, 20631215, 20631216, 20631217,
			
 
				-                      20631223, 20631224, 20631225, 20631226, 20631227]
			
 
				-    uids_prod_recommend = [20631208, 20631209, 20631210, 20631211, 20631212,
			
 
				-                           20631213, 20631214, 20631215, 20631216, 20631217,
			
 
				-                           20631223, 20631224, 20631225, 20631226, 20631227]
			
 
				-
			
 
				-    @classmethod
			
 
				-    def upload_and_publish(cls, log_type, env, job):
			
 
				-        """
			
 
				-        上传视频到 oss
			
 
				-        :param log_type: 选择的 log
			
 
				-        :param env: 测试环境：dev，正式环境：prod
			
 
				-        :param job: 上升榜：up，播放量：play， send_time：发布时间榜
			
 
				-        """
			
 
				-        Common.logger(log_type).info("upload_and_publish starting...")
			
 
				-        today = time.strftime("%Y%m%d", time.localtime())
			
 
				-        # videos 目录下的所有视频文件夹
			
 
				-        files = os.listdir(cls.local_file_path)
			
 
				-        for f in files:
			
 
				-            try:
			
 
				-                # 单个视频文件夹
			
 
				-                fi_d = os.path.join(cls.local_file_path, f)
			
 
				-                # 确认为视频文件夹
			
 
				-                if os.path.isdir(fi_d):
			
 
				-                    Common.logger(log_type).info('dir = {}', fi_d)
			
 
				-                    # 列出所有视频文件夹
			
 
				-                    dir_files = os.listdir(fi_d)
			
 
				-                    data = {'appType': '888888', 'crawlerSrcCode': 'KANYIKAN', 'viewStatus': '1', 'versionCode': '1'}
			
 
				-                    now_timestamp = int(round(time.time() * 1000))
			
 
				-                    data['crawlerTaskTimestamp'] = str(now_timestamp)
			
 
				-                    global uid
			
 
				-                    if env == "dev" and job == "up":
			
 
				-                        uid = str(random.choice(cls.uids_dev_up))
			
 
				-                    elif env == "dev" and job == "play":
			
 
				-                        uid = str(random.choice(cls.uids_dev_play))
			
 
				-                    elif env == "dev" and job == "recommend":
			
 
				-                        uid = str(random.choice(cls.uids_dev_recommend))
			
 
				-                    elif env == "prod" and job == "up":
			
 
				-                        uid = str(random.choice(cls.uids_prod_up))
			
 
				-                    elif env == "prod" and job == "play":
			
 
				-                        uid = str(random.choice(cls.uids_prod_play))
			
 
				-                    elif env == "prod" and job == "recommend":
			
 
				-                        uid = str(random.choice(cls.uids_prod_recommend))
			
 
				-                    data['loginUid'] = uid
			
 
				-                    # 单个视频文件夹下的所有视频文件
			
 
				-                    for fi in dir_files:
			
 
				-                        # 视频文件夹下的所有文件路径
			
 
				-                        fi_path = fi_d + '/' + fi
			
 
				-                        Common.logger(log_type).info('dir fi_path = {}', fi_path)
			
 
				-                        # 读取 info.txt，赋值给 data
			
 
				-                        if cls.info_file in fi:
			
 
				-                            f = open(fi_path, "r", encoding="UTF-8")
			
 
				-                            # 读取数据 数据准确性写入的时候保证 读取暂不处理
			
 
				-                            for i in range(14):
			
 
				-                                line = f.readline()
			
 
				-                                line = line.replace('\n', '')
			
 
				-                                if line is not None and len(line) != 0 and not line.isspace():
			
 
				-                                    Common.logger(log_type).info("line = {}", line)
			
 
				-                                    if i == 0:
			
 
				-                                        data['crawlerSrcId'] = line
			
 
				-                                    elif i == 1:
			
 
				-                                        data['title'] = line
			
 
				-                                    elif i == 2:
			
 
				-                                        data['totalTime'] = line
			
 
				-                                    elif i == 8:
			
 
				-                                        data['crawlerSrcPublishTimestamp'] = line
			
 
				-                                else:
			
 
				-                                    Common.logger(log_type).warning("{} line is None", fi_path)
			
 
				-                            f.close()
			
 
				-                            # remove info.txt
			
 
				-                            cls.remove_local_file(log_type, fi_path)
			
 
				-                    # 刷新数据
			
 
				-                    dir_files = os.listdir(fi_d)
			
 
				-                    for fi in dir_files:
			
 
				-                        fi_path = fi_d + '/' + fi
			
 
				-                        Common.logger(log_type).info('dir fi_path = {}', fi_path)
			
 
				-                        # 上传oss
			
 
				-                        if cls.video_file in fi:
			
 
				-                            global oss_video_file
			
 
				-                            if env == "dev":
			
 
				-                                oss_video_file = cls.oss_file_path_video.format("dev", today, data['crawlerSrcId'])
			
 
				-                            elif env == "prod":
			
 
				-                                oss_video_file = cls.oss_file_path_video.format("prod", today, data['crawlerSrcId'])
			
 
				-                            Common.logger(log_type).info("oss_video_file = {}", oss_video_file)
			
 
				-                            cls.put_file(log_type, oss_video_file, fi_path)
			
 
				-                            data['videoPath'] = oss_video_file
			
 
				-                            Common.logger(log_type).info("videoPath = {}", oss_video_file)
			
 
				-                        elif cls.image_file in fi:
			
 
				-                            global oss_image_file
			
 
				-                            if env == "dev":
			
 
				-                                oss_image_file = cls.oss_file_path_image.format("env", today, data['crawlerSrcId'])
			
 
				-                            elif env == "prod":
			
 
				-                                oss_image_file = cls.oss_file_path_image.format("prod", today, data['crawlerSrcId'])
			
 
				-                            Common.logger(log_type).info("oss_image_file = {}", oss_image_file)
			
 
				-                            cls.put_file(log_type, oss_image_file, fi_path)
			
 
				-                            data['coverImgPath'] = oss_image_file
			
 
				-                            Common.logger(log_type).info("coverImgPath = {}", oss_image_file)
			
 
				-                        # 全部remove
			
 
				-                        cls.remove_local_file(log_type, fi_path)
			
 
				-
			
 
				-                    # 发布
			
 
				-                    if env == "dev":
			
 
				-                        video_id = cls.publish_video_dev(log_type, data)
			
 
				-                    elif env == "prod":
			
 
				-                        video_id = cls.publish_video_prod(log_type, data)
			
 
				-                    else:
			
 
				-                        video_id = cls.publish_video_dev(log_type, data)
			
 
				-                    cls.remove_local_file_dir(log_type, fi_d)
			
 
				-                    return video_id
			
 
				-
			
 
				-                else:
			
 
				-                    Common.logger(log_type).error('file not a dir = {}', fi_d)
			
 
				-            except Exception as e:
			
 
				-                # 删除视频文件夹
			
 
				-                Common.logger(log_type).exception('upload_and_publish error', e)