# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2023/1/31 """ 站内UID配置 / 环境配置 / 视频上传 """ import json import os import random import shutil import sys import time import oss2 import requests import urllib3 sys.path.append(os.getcwd()) from common.common import Common proxies = {"http": None, "https": None} class Publish: @classmethod def publish_video_dev(cls, log_type, crawler, request_data): """ loginUid 站内uid (随机) appType 默认:888888 crawlerSrcId 站外视频ID crawlerSrcCode 渠道(自定义 KYK) crawlerSrcPublishTimestamp 视频原发布时间 crawlerTaskTimestamp 爬虫创建时间(可以是当前时间) videoPath 视频oss地址 coverImgPath 视频封面oss地址 title 标题 totalTime 视频时长 viewStatus 视频的有效状态 默认1 versionCode 版本 默认1 :return: """ Common.logger(log_type, crawler).info('publish request data: {}'.format(request_data)) result = cls.request_post('https://videotest.yishihui.com/longvideoapi/crawler/video/send', request_data) Common.logger(log_type, crawler).info('publish result: {}'.format(result)) video_id = result["data"]["id"] Common.logger(log_type, crawler).info('video_id: {}'.format(video_id)) if result['code'] != 0: Common.logger(log_type, crawler).error('pushlish failure msg = {}'.format(result['msg'])) else: Common.logger(log_type, crawler).info( 'publish success video_id = : {}'.format(request_data['crawlerSrcId'])) return video_id @classmethod def publish_video_prod(cls, log_type, crawler, request_data): """ loginUid 站内uid (随机) appType 默认:888888 crawlerSrcId 站外视频ID crawlerSrcCode 渠道(自定义 KYK) crawlerSrcPublishTimestamp 视频原发布时间 crawlerTaskTimestamp 爬虫创建时间(可以是当前时间) videoPath 视频oss地址 coverImgPath 视频封面oss地址 title 标题 totalTime 视频时长 viewStatus 视频的有效状态 默认1 versionCode 版本 默认1 :return: """ Common.logger(log_type, crawler).info(f'publish request data: {request_data}') result = cls.request_post('https://longvideoapi.piaoquantv.com/longvideoapi/crawler/video/send', request_data) Common.logger(log_type, crawler).info(f'publish result: {result}') video_id = result["data"]["id"] Common.logger(log_type, crawler).info(f'video_id: {video_id}') if result['code'] != 0: Common.logger(log_type, crawler).error('pushlish failure msg = {}'.format(result['msg'])) else: Common.logger(log_type, crawler).info( 'publish success video_id = : {}'.format(request_data['crawlerSrcId'])) return video_id @classmethod def request_post(cls, request_url, request_data): """ post 请求 HTTP接口 :param request_url: 接口URL :param request_data: 请求参数 :return: res_data json格式 """ urllib3.disable_warnings() response = requests.post(url=request_url, data=request_data, proxies=proxies, verify=False) if response.status_code == 200: res_data = json.loads(response.text) return res_data @classmethod def bucket(cls, oss_endpoint): """ 创建 bucket :param oss_endpoint: inner:内网;out:外网;hk:香港 :return: bucket """ # 以下代码展示了基本的文件上传、下载、罗列、删除用法。 # 首先初始化AccessKeyId、AccessKeySecret、Endpoint等信息。 # 通过环境变量获取,或者把诸如“<你的AccessKeyId>”替换成真实的AccessKeyId等。 # # 以杭州区域为例,Endpoint可以是: # http://oss-cn-hangzhou.aliyuncs.com # https://oss-cn-hangzhou.aliyuncs.com # 分别以HTTP、HTTPS协议访问。 access_key_id = os.getenv('OSS_TEST_ACCESS_KEY_ID', 'LTAIP6x1l3DXfSxm') access_key_secret = os.getenv('OSS_TEST_ACCESS_KEY_SECRET', 'KbTaM9ars4OX3PMS6Xm7rtxGr1FLon') bucket_name = os.getenv('OSS_TEST_BUCKET', 'art-pubbucket') # OSS 内网 if oss_endpoint == 'inner': endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou-internal.aliyuncs.com') # OSS 外网 elif oss_endpoint == 'out': endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou.aliyuncs.com') elif oss_endpoint == 'hk': endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-accelerate.aliyuncs.com') # 默认走外网 else: endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou.aliyuncs.com') # 确认上面的参数都填写正确了 for param in (access_key_id, access_key_secret, bucket_name, endpoint): assert '<' not in param, '请设置参数:' + param # 创建Bucket对象,所有Object相关的接口都可以通过Bucket对象来进行 bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name) return bucket """ 处理流程: 1. 定时(每天凌晨1点执行一次)循环files文件下的内容 结构:files -> 视频文件夹 -> 视频文件 + 封面图 + 基本信息 2. 视频文件和封面上传到oss - 视频文件oss目录 longvideo/crawler_local/video/prod/文件名 - 视频封面oss目录 longvideo/crawler_local/image/prod/文件名 3. 发布视频 - 读取 基本信息 调用发布接口 """ # env 日期20220225 文件名 oss_file_path_video = 'longvideo/crawler_local/video/{}/{}/{}' oss_file_path_image = 'longvideo/crawler_local/image/{}/{}/{}' @classmethod def put_file(cls, log_type, crawler, oss_endpoint, oss_file, local_file): # cls.bucket.put_object_from_file(oss_file, local_file) cls.bucket(oss_endpoint).put_object_from_file(oss_file, local_file) Common.logger(log_type, crawler).info("put oss file = {}, local file = {} success".format(oss_file, local_file)) # 清除本地文件 @classmethod def remove_local_file(cls, log_type, crawler, local_file): os.remove(local_file) Common.logger(log_type, crawler).info("remove local file = {} success".format(local_file)) # 清除本地文件夹 @classmethod def remove_local_file_dir(cls, log_type, crawler, local_file): os.rmdir(local_file) Common.logger(log_type, crawler).info("remove local file dir = {} success".format(local_file)) # 站内 UID @classmethod def uids(cls, crawler, strategy, our_uid, env): """ 站内 ID :param crawler: 哪款爬虫 :param env: 什么环境 :param strategy: 榜单类型,也可以是指定的站内 UID :param our_uid: 上传到指定站内 UID :return: uid """ if env == 'dev': uids_dev = [6267140, 6267141] return random.choice(uids_dev) # 小年糕 elif crawler == 'xiaoniangao' and env == 'prod' and strategy == '定向爬虫策略': uids_prod_xiaoniangao_follow = [50322210, 50322211, 50322212, 50322213, 50322214, 50322215, 50322216, 50322217, 50322218, 50322219, 50322220, 50322221] return random.choice(uids_prod_xiaoniangao_follow) elif crawler == 'xiaoniangao' and env == 'prod' and strategy == '小时榜爬虫策略': uids_prod_xiaoniangao_hour = [50322226, 50322227, 50322228, 50322229] return random.choice(uids_prod_xiaoniangao_hour) elif crawler == 'xiaoniangao' and env == 'prod' and strategy == '播放量榜爬虫策略': uids_prod_xiaoniangao_play = [50322222, 50322223, 50322224, 50322225] return random.choice(uids_prod_xiaoniangao_play) elif crawler == 'gongzhonghao' and env == 'prod' and strategy == '定向爬虫策略': uids_prod_gongzhonghao_follow = [26117675, 26117676, 26117677, 26117678, 26117679, 26117680] return random.choice(uids_prod_gongzhonghao_follow) elif crawler == 'kanyikan': uids_prod_kanyikan_moment = [20631208, 20631209, 20631210, 20631211, 20631212, 20631213, 20631214, 20631215, 20631216, 20631217, 20631223, 20631224, 20631225, 20631226, 20631227] return random.choice(uids_prod_kanyikan_moment) elif crawler == 'ggdc' and env == 'prod' and strategy == 'kanyikan_recommend': uids_ggdc_prod_recommend = [26117661, 26117662, 26117663] return random.choice(uids_ggdc_prod_recommend) elif crawler == 'ggdc' and env == 'prod' and strategy == 'follow': uids_ggdc_prod_follow = [26117661, 26117662, 26117663] return random.choice(uids_ggdc_prod_follow) else: return our_uid # 爬虫渠道号 @classmethod def crawlersrccode(cls, crawler): if crawler == 'youtube': return 'YOUTUBE' elif crawler == 'kanyikan': return 'KANYIKAN' elif crawler == "kuaishou": return "KUAISHOU_XCX" elif crawler == "weishi": return "WEISHI" elif crawler == "xiaoniangao": return "XIAONIANGAO_XCX" elif crawler == "benshanzhufu": return "BENSHANZHUFU" elif crawler == "gongzhonghao_xinxin": return "GONGZHONGHAO_XINXIN" elif crawler == 'shipinhao': return 'SHIPINHAO_XCX' elif crawler == 'xigua': return 'XIGUA' elif crawler == 'zhihu': return 'ZHIHU' elif crawler == 'jixiangxingfu': return 'JIXIANGXINGFU' elif crawler == 'zhongmiaoyinxin': return 'ZHONGMIAOYINXIN' elif crawler == 'suisuiniannianyingfuqi': return 'SUISUINIANNIANYINGFUQI' elif crawler == 'zhufumao': return 'ZHUFUMAO' elif crawler == 'zongjiao': return 'ZONGJIAO' elif crawler == 'haokan': return 'HAOKAN' elif crawler == 'kandaojiushifuqi': return 'KANDAOJIUSHIFUQI' elif crawler == 'shengshengyingyin': return 'SHENGSHENGYINGYIN' elif crawler == 'ganggangdouchuan': return 'GANGGANGDOUCHUAN' elif crawler == 'gongzhonghao_xinxin': return 'GONGZHONGHAO_XINXIN' elif crawler == 'weixinzhishu': return 'WEIXINZHISHU' else: return "CRAWLER" @classmethod def local_file_path(cls, crawler): local_file_path = f'./{crawler}/videos' video_file = 'video' image_file = 'image' info_file = 'info' loacl_file_dict = { 'local_file_path': local_file_path, 'video_file': video_file, 'image_file': image_file, 'info_file': info_file} return loacl_file_dict @classmethod def upload_and_publish(cls, log_type, crawler, strategy, our_uid, env, oss_endpoint): """ 上传视频到 oss :param log_type: 选择的 log :param crawler: 哪款爬虫 :param env: 测试环境:dev,正式环境:prod :param our_uid: 站内 UID :param strategy: 榜单类型 :param oss_endpoint: 内网:inner;外网:out """ Common.logger(log_type, crawler).info("upload_and_publish starting...") today = time.strftime("%Y%m%d", time.localtime()) # videos 目录下的所有视频文件夹 files = os.listdir(cls.local_file_path(crawler)["local_file_path"]) for fv in files: try: # 单个视频文件夹 fi_d = os.path.join(cls.local_file_path(crawler)["local_file_path"], fv) # 确认为视频文件夹 if os.path.isdir(fi_d): Common.logger(log_type, crawler).info('dir = {}'.format(fi_d)) # 列出所有视频文件夹 dir_files = os.listdir(fi_d) data = {'appType': '888888', 'crawlerSrcCode': cls.crawlersrccode(crawler), 'viewStatus': '1', 'versionCode': '1'} now_timestamp = int(round(time.time() * 1000)) data['crawlerTaskTimestamp'] = str(now_timestamp) data['loginUid'] = cls.uids(crawler, strategy, our_uid, env) # 单个视频文件夹下的所有视频文件 for fi in dir_files: # 视频文件夹下的所有文件路径 fi_path = fi_d + '/' + fi Common.logger(log_type, crawler).info('dir fi_path = {}'.format(fi_path)) # 读取 info.txt,赋值给 data if cls.local_file_path(crawler)["info_file"] in fi: f = open(fi_path, "r", encoding="UTF-8") # 读取数据 数据准确性写入的时候保证 读取暂不处理 for i in range(14): line = f.readline() line = line.replace('\n', '') if line is not None and len(line) != 0 and not line.isspace(): # Common.logger(log_type, crawler).info("line = {}".format(line)) if i == 0: data['crawlerSrcId'] = line elif i == 1: data['title'] = line elif i == 2: data['totalTime'] = line elif i == 8: data['crawlerSrcPublishTimestamp'] = line else: Common.logger(log_type, crawler).warning("{} line is None".format(fi_path)) f.close() # remove info.txt cls.remove_local_file(log_type, crawler, fi_path) # 刷新数据 dir_files = os.listdir(fi_d) for fi in dir_files: fi_path = fi_d + '/' + fi # Common.logger(log_type, crawler).info('dir fi_path = {}'.format(fi_path)) # 上传oss if cls.local_file_path(crawler)["video_file"] in fi: global oss_video_file if env == "dev": oss_video_file = cls.oss_file_path_video.format("dev", today, data['crawlerSrcId']) elif env == "prod": oss_video_file = cls.oss_file_path_video.format("prod", today, data['crawlerSrcId']) elif env == "hk": oss_video_file = cls.oss_file_path_video.format("prod", today, data['crawlerSrcId']) Common.logger(log_type, crawler).info("oss_video_file = {}".format(oss_video_file)) cls.put_file(log_type, crawler, oss_endpoint, oss_video_file, fi_path) data['videoPath'] = oss_video_file Common.logger(log_type, crawler).info("videoPath = {}".format(oss_video_file)) elif cls.local_file_path(crawler)["image_file"] in fi: global oss_image_file if env == "dev": oss_image_file = cls.oss_file_path_image.format("env", today, data['crawlerSrcId']) elif env == "prod": oss_image_file = cls.oss_file_path_image.format("prod", today, data['crawlerSrcId']) elif env == "hk": oss_image_file = cls.oss_file_path_image.format("prod", today, data['crawlerSrcId']) Common.logger(log_type, crawler).info("oss_image_file = {}".format(oss_image_file)) cls.put_file(log_type, crawler, oss_endpoint, oss_image_file, fi_path) data['coverImgPath'] = oss_image_file Common.logger(log_type, crawler).info("coverImgPath = {}".format(oss_image_file)) # 全部remove cls.remove_local_file(log_type, crawler, fi_path) # 发布 if env == "dev": video_id = cls.publish_video_dev(log_type, crawler, data) elif env == "prod": video_id = cls.publish_video_prod(log_type, crawler, data) elif env == "hk": video_id = cls.publish_video_prod(log_type, crawler, data) else: video_id = cls.publish_video_dev(log_type, crawler, data) cls.remove_local_file_dir(log_type, crawler, fi_d) Common.logger(log_type, crawler).info('video_id:{}', video_id) return video_id else: Common.logger(log_type, crawler).error('file not a dir = {}'.format(fi_d)) except Exception as e: # 删除视频文件夹 shutil.rmtree(f"./{crawler}/videos/{fv}/") Common.logger(log_type, crawler).exception('upload_and_publish error', e)