Server
/
piaoquan_crawler


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
							# -*- coding: utf-8 -*-
# @Author: wangkun
# @Time: 2023/4/13
import json
import os
import sys
import time
from datetime import date, timedelta
sys.path.append(os.getcwd())
from common.common import Common
from common.feishu import Feishu
from common.scheduling_db import MysqlHelper


class Insert:
    @classmethod
    def get_config(cls, log_type, crawler, text, env):
        select_sql = f"""select * from crawler_config where source="benshanzhufu" """
        contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
        title_list = []
        filter_list = []
        for content in contents:
            config = content['config']
            config_dict = eval(config)
            for k, v in config_dict.items():
                if k == "title":
                    title_list_config = v.split(",")
                    for title in title_list_config:
                        title_list.append(title)
                if k == "filter":
                    filter_list_config = v.split(",")
                    for filter_word in filter_list_config:
                        filter_list.append(filter_word)
        if text == "title":
            return title_list
        elif text == "filter":
            return filter_list

    @classmethod
    def before_day(cls):
        publish_time_str_rule = (date.today() + timedelta(days=-30)).strftime("%Y-%m-%d %H:%M:%S")
        publish_time_stamp_rule = int(time.mktime(time.strptime(publish_time_str_rule, "%Y-%m-%d %H:%M:%S")))
        print(publish_time_str_rule)
        print(publish_time_stamp_rule)

    @classmethod
    def insert_config(cls, log_type, crawler, env):
        filter_sheet = Feishu.get_values_batch(log_type, crawler, "DjXfqG")
        # title_sheet = Feishu.get_values_batch(log_type, crawler, "bHSW1p")
        filter_list = []
        # title_list = []
        for x in filter_sheet:
            for y in x:
                if y is None:
                    pass
                else:
                    filter_list.append(y)
        # for x in title_sheet:
        #     for y in x:
        #         if y is None:
        #             pass
        #         else:
        #             title_list.append(y)
        # str_title = ','.join(title_list)
        str_filter = ','.join(filter_list)
        config_dict = {
            # "title": str_title,
            "filter": str_filter
        }
        str_config_dict = str(config_dict)
        # print(f"config_dict:{config_dict}")
        # print(f"str_config_dict:{str_config_dict}")
        insert_sql = f""" insert into crawler_config(title, source, config) values("本山祝福小程序", "benshanzhufu", "{str_config_dict}") """
        MysqlHelper.update_values(log_type, crawler, insert_sql, env)

    @classmethod
    def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env):
        benshanzhufu_sheetid = ['440018']
        for sheetid in benshanzhufu_sheetid:
            xiaoniangao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
            for i in range(1, len(xiaoniangao_sheet)):
            # for i in range(1, 3):
                if xiaoniangao_sheet[i][5] is None or xiaoniangao_sheet[i][9] is None:
                    continue
                video_id = xiaoniangao_sheet[i][8].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace(
                    "/info", "")
                if video_id == "None":
                    continue
                video_id = int(video_id)
                out_user_id = str(xiaoniangao_sheet[i][17])
                platform = "本山祝福"
                strategy = "推荐榜爬虫策略"
                out_video_id = str(xiaoniangao_sheet[i][6])
                video_title = str(xiaoniangao_sheet[i][7])
                cover_url = str(xiaoniangao_sheet[i][19])
                video_url = str(xiaoniangao_sheet[i][20])
                duration = int(xiaoniangao_sheet[i][13])
                publish_time = str(xiaoniangao_sheet[i][15]).replace("/", "-")
                play_cnt = int(xiaoniangao_sheet[i][9])
                like_cnt = int(xiaoniangao_sheet[i][11])
                share_cnt = int(xiaoniangao_sheet[i][12])
                # collection_cnt = 0
                comment_cnt = int(xiaoniangao_sheet[i][10])
                user_id = str(xiaoniangao_sheet[i][17])
                crawler_rule = json.dumps({})
                width = int(xiaoniangao_sheet[i][14].split("*")[0])
                height = int(xiaoniangao_sheet[i][14].split("*")[1])

                # print(f"video_id:{video_id}, type:{type(video_id)}")
                # print(f"user_id:{user_id}, type:{type(user_id)}")
                # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
                # print(f"platform:{platform}, type:{type(platform)}")
                # print(f"strategy:{strategy}, type:{type(strategy)}")
                # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}")
                # print(f"video_title:{video_title}, type:{type(video_title)}")
                # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
                # print(f"video_url:{video_url}, type:{type(video_url)}")
                # print(f"duration:{duration}, type:{type(duration)}")
                # print(f"publish_time:{publish_time}, type:{type(publish_time)}")
                # print(f"play_cnt:{play_cnt}, type:{type(play_cnt)}")
                # print(f"like_cnt:{like_cnt}, type:{type(like_cnt)}")
                # print(f"share_cnt:{share_cnt}, type:{type(share_cnt)}")
                # print(f"comment_cnt:{comment_cnt}, type:{type(comment_cnt)}")
                # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
                # print(f"width:{width}, type:{type(width)}")
                # print(f"height:{height}, type:{type(height)}\n")

                select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """
                Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
                repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env)
                Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")

                if repeat_video is not None and len(repeat_video) != 0:
                    Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
                else:
                    # 视频信息保存数据库
                    insert_sql = f""" insert into crawler_video(video_id,
                                        out_user_id,
                                        platform,
                                        strategy,
                                        out_video_id,
                                        video_title,
                                        cover_url,
                                        video_url,
                                        duration,
                                        publish_time,
                                        play_cnt,
                                        like_cnt,
                                        share_cnt,
                                        comment_cnt,
                                        crawler_rule,
                                        width,
                                        height)
                                        values({video_id},
                                        "{out_user_id}",
                                        "{platform}",
                                        "{strategy}",
                                        "{out_video_id}",
                                        "{video_title}",
                                        "{cover_url}",
                                        "{video_url}",
                                        {duration},
                                        "{publish_time}",
                                        {play_cnt},
                                        {like_cnt},
                                        {share_cnt},
                                        {comment_cnt},
                                        '{crawler_rule}',
                                        {width},
                                        {height}) """
                    Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
                    MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
                    Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')


if __name__ == "__main__":
    # Insert.insert_config("insert", "benshanzhufu", "dev")
    # print(Insert.get_config("insert", "benshanzhufu", "filter", "dev"))
    Insert.insert_video_from_feishu_to_mysql("insert-prod", "benshanzhufu", "prod")
    pass