# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2023/3/28 import json import os import sys sys.path.append(os.getcwd()) from common.common import Common from common.scheduling_db import MysqlHelper from common.feishu import Feishu class Insert: @classmethod def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env, machine): gongzhonghao_sheetid_list = ['47e39d'] for sheetid in gongzhonghao_sheetid_list: gongzhonghao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid) for i in range(1, len(gongzhonghao_sheet)): # for i in range(1, 3): if gongzhonghao_sheet[i][5] is None or gongzhonghao_sheet[i][9] is None: continue video_id = gongzhonghao_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace("/info", "") if video_id == "None": continue video_id = int(video_id) out_user_id = str(gongzhonghao_sheet[i][14]) platform = "公众号" strategy = "定向爬虫策略" out_video_id = str(gongzhonghao_sheet[i][8]) video_title = str(gongzhonghao_sheet[i][7]) cover_url = str(gongzhonghao_sheet[i][16]) video_url = str(gongzhonghao_sheet[i][18]) duration = int(gongzhonghao_sheet[i][10]) publish_time = str(gongzhonghao_sheet[i][12]).replace("/", "-") crawler_rule = json.dumps({"play_cnt": {"min": 0}, "duration": {"min": 20}, "publish_day": {"min": 0}}) width = int(gongzhonghao_sheet[i][11].split("*")[0]) height = int(gongzhonghao_sheet[i][11].split("*")[1]) # print(f"video_id:{video_id}, type:{type(video_id)}") # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}") # print(f"platform:{platform}, type:{type(platform)}") # print(f"strategy:{strategy}, type:{type(strategy)}") # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}") # print(f"video_title:{video_title}, type:{type(video_title)}") # print(f"cover_url:{cover_url}, type:{type(cover_url)}") # print(f"video_url:{video_url}, type:{type(video_url)}") # print(f"duration:{duration}, type:{type(duration)}") # print(f"publish_time:{publish_time}, type:{type(publish_time)}") # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}") # print(f"width:{width}, type:{type(width)}") # print(f"height:{height}, type:{type(height)}\n") select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """ Common.logger(log_type, crawler).info(f"select_sql:{select_sql}") repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env, machine) Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}") if repeat_video is not None and len(repeat_video) != 0: Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n") else: # 视频信息保存数据库 insert_sql = f""" insert into crawler_video(video_id, out_user_id, platform, strategy, out_video_id, video_title, cover_url, video_url, duration, publish_time, crawler_rule, width, height) values({video_id}, "{out_user_id}", "{platform}", "{strategy}", "{out_video_id}", "{video_title}", "{cover_url}", "{video_url}", {duration}, "{publish_time}", '{crawler_rule}', {width}, {height}) """ Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}") MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine) Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n') if __name__ == "__main__": Insert.insert_video_from_feishu_to_mysql("insert-prod", "gongzhonghao", "prod", "local") pass