Server
/
piaoquan_crawler


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
							# -*- coding: utf-8 -*-
# @Author: wangkun
# @Time: 2023/3/28
import json
import os
import sys
sys.path.append(os.getcwd())
from common.common import Common
from common.scheduling_db  import MysqlHelper
from common.feishu import Feishu


class Insert:
    @classmethod
    def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env, machine):
        gongzhonghao_sheetid_list = ['47e39d']
        for sheetid in gongzhonghao_sheetid_list:
            gongzhonghao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
            for i in range(1, len(gongzhonghao_sheet)):
            # for i in range(1, 3):
                if gongzhonghao_sheet[i][5] is None or gongzhonghao_sheet[i][9] is None:
                    continue
                video_id = gongzhonghao_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace("/info", "")
                if video_id == "None":
                    continue
                video_id = int(video_id)
                out_user_id = str(gongzhonghao_sheet[i][14])
                platform = "公众号"
                strategy = "定向爬虫策略"
                out_video_id = str(gongzhonghao_sheet[i][8])
                video_title = str(gongzhonghao_sheet[i][7])
                cover_url = str(gongzhonghao_sheet[i][16])
                video_url = str(gongzhonghao_sheet[i][18])
                duration = int(gongzhonghao_sheet[i][10])
                publish_time = str(gongzhonghao_sheet[i][12]).replace("/", "-")
                crawler_rule = json.dumps({"play_cnt": {"min": 0}, "duration": {"min": 20}, "publish_day": {"min": 0}})
                width = int(gongzhonghao_sheet[i][11].split("*")[0])
                height = int(gongzhonghao_sheet[i][11].split("*")[1])

                # print(f"video_id:{video_id}, type:{type(video_id)}")
                # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
                # print(f"platform:{platform}, type:{type(platform)}")
                # print(f"strategy:{strategy}, type:{type(strategy)}")
                # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}")
                # print(f"video_title:{video_title}, type:{type(video_title)}")
                # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
                # print(f"video_url:{video_url}, type:{type(video_url)}")
                # print(f"duration:{duration}, type:{type(duration)}")
                # print(f"publish_time:{publish_time}, type:{type(publish_time)}")
                # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
                # print(f"width:{width}, type:{type(width)}")
                # print(f"height:{height}, type:{type(height)}\n")

                select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """
                Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
                repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env, machine)
                Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")

                if repeat_video is not None and len(repeat_video) != 0:
                    Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
                else:
                    # 视频信息保存数据库
                    insert_sql = f""" insert into crawler_video(video_id,
                                    out_user_id,
                                    platform,
                                    strategy,
                                    out_video_id,
                                    video_title,
                                    cover_url,
                                    video_url,
                                    duration,
                                    publish_time,
                                    crawler_rule,
                                    width,
                                    height)
                                    values({video_id},
                                    "{out_user_id}",
                                    "{platform}",
                                    "{strategy}",
                                    "{out_video_id}",
                                    "{video_title}",
                                    "{cover_url}",
                                    "{video_url}",
                                    {duration},
                                    "{publish_time}",
                                    '{crawler_rule}',
                                    {width},
                                    {height}) """
                    Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
                    MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
                    Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')


if __name__ == "__main__":
    Insert.insert_video_from_feishu_to_mysql("insert-prod", "gongzhonghao", "prod", "local")
    pass