|  | @@ -0,0 +1,117 @@
 | 
	
		
			
				|  |  | +# -*- coding: utf-8 -*-
 | 
	
		
			
				|  |  | +# @Author: wangkun
 | 
	
		
			
				|  |  | +# @Time: 2023/3/14
 | 
	
		
			
				|  |  | +import json
 | 
	
		
			
				|  |  | +import os
 | 
	
		
			
				|  |  | +import sys
 | 
	
		
			
				|  |  | +sys.path.append(os.getcwd())
 | 
	
		
			
				|  |  | +from common.common import Common
 | 
	
		
			
				|  |  | +from common.scheduling_db  import MysqlHelper
 | 
	
		
			
				|  |  | +from common.feishu import Feishu
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +class Insert:
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env, machine):
 | 
	
		
			
				|  |  | +        xiaoniangao_sheetid_list = ['yatRv2']
 | 
	
		
			
				|  |  | +        for sheetid in xiaoniangao_sheetid_list:
 | 
	
		
			
				|  |  | +            xiaoniangao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
 | 
	
		
			
				|  |  | +            for i in range(1, len(xiaoniangao_sheet)):
 | 
	
		
			
				|  |  | +            # for i in range(1, 3):
 | 
	
		
			
				|  |  | +                if xiaoniangao_sheet[i][5] is None or xiaoniangao_sheet[i][9] is None:
 | 
	
		
			
				|  |  | +                    continue
 | 
	
		
			
				|  |  | +                video_id = xiaoniangao_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace("/info", "")
 | 
	
		
			
				|  |  | +                if video_id == "None":
 | 
	
		
			
				|  |  | +                    continue
 | 
	
		
			
				|  |  | +                video_id = int(video_id)
 | 
	
		
			
				|  |  | +                out_user_id = str(xiaoniangao_sheet[i][19])
 | 
	
		
			
				|  |  | +                platform = "小年糕"
 | 
	
		
			
				|  |  | +                strategy = "定向爬虫策略"
 | 
	
		
			
				|  |  | +                out_video_id = str(xiaoniangao_sheet[i][7])
 | 
	
		
			
				|  |  | +                video_title = str(xiaoniangao_sheet[i][8])
 | 
	
		
			
				|  |  | +                cover_url = str(xiaoniangao_sheet[i][21])
 | 
	
		
			
				|  |  | +                video_url = str(xiaoniangao_sheet[i][22])
 | 
	
		
			
				|  |  | +                duration = int(xiaoniangao_sheet[i][14])
 | 
	
		
			
				|  |  | +                publish_time = str(xiaoniangao_sheet[i][16]).replace("/", "-")
 | 
	
		
			
				|  |  | +                play_cnt = int(xiaoniangao_sheet[i][10])
 | 
	
		
			
				|  |  | +                like_cnt = int(xiaoniangao_sheet[i][12])
 | 
	
		
			
				|  |  | +                share_cnt = int(xiaoniangao_sheet[i][13])
 | 
	
		
			
				|  |  | +                # collection_cnt = 0
 | 
	
		
			
				|  |  | +                comment_cnt = int(xiaoniangao_sheet[i][11])
 | 
	
		
			
				|  |  | +                crawler_rule = json.dumps({"play_cnt": {"min": 500}, "duration": {"min": 40}, "publish_day": {"min": 3}})
 | 
	
		
			
				|  |  | +                width = int(xiaoniangao_sheet[i][15].split("*")[0])
 | 
	
		
			
				|  |  | +                height = int(xiaoniangao_sheet[i][15].split("*")[1])
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                # print(f"video_id:{video_id}, type:{type(video_id)}")
 | 
	
		
			
				|  |  | +                # print(f"user_id:{user_id}, type:{type(user_id)}")
 | 
	
		
			
				|  |  | +                # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
 | 
	
		
			
				|  |  | +                # print(f"platform:{platform}, type:{type(platform)}")
 | 
	
		
			
				|  |  | +                # print(f"strategy:{strategy}, type:{type(strategy)}")
 | 
	
		
			
				|  |  | +                # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}")
 | 
	
		
			
				|  |  | +                # print(f"video_title:{video_title}, type:{type(video_title)}")
 | 
	
		
			
				|  |  | +                # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
 | 
	
		
			
				|  |  | +                # print(f"video_url:{video_url}, type:{type(video_url)}")
 | 
	
		
			
				|  |  | +                # print(f"duration:{duration}, type:{type(duration)}")
 | 
	
		
			
				|  |  | +                # print(f"publish_time:{publish_time}, type:{type(publish_time)}")
 | 
	
		
			
				|  |  | +                # print(f"play_cnt:{play_cnt}, type:{type(play_cnt)}")
 | 
	
		
			
				|  |  | +                # print(f"like_cnt:{like_cnt}, type:{type(like_cnt)}")
 | 
	
		
			
				|  |  | +                # print(f"share_cnt:{share_cnt}, type:{type(share_cnt)}")
 | 
	
		
			
				|  |  | +                # print(f"comment_cnt:{comment_cnt}, type:{type(comment_cnt)}")
 | 
	
		
			
				|  |  | +                # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
 | 
	
		
			
				|  |  | +                # print(f"width:{width}, type:{type(width)}")
 | 
	
		
			
				|  |  | +                # print(f"height:{height}, type:{type(height)}\n")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
 | 
	
		
			
				|  |  | +                repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env, machine)
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                if repeat_video is not None and len(repeat_video) != 0:
 | 
	
		
			
				|  |  | +                    Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
 | 
	
		
			
				|  |  | +                else:
 | 
	
		
			
				|  |  | +                    # 视频信息保存数据库
 | 
	
		
			
				|  |  | +                    insert_sql = f""" insert into crawler_video(video_id,
 | 
	
		
			
				|  |  | +                                    out_user_id,
 | 
	
		
			
				|  |  | +                                    platform,
 | 
	
		
			
				|  |  | +                                    strategy,
 | 
	
		
			
				|  |  | +                                    out_video_id,
 | 
	
		
			
				|  |  | +                                    video_title,
 | 
	
		
			
				|  |  | +                                    cover_url,
 | 
	
		
			
				|  |  | +                                    video_url,
 | 
	
		
			
				|  |  | +                                    duration,
 | 
	
		
			
				|  |  | +                                    publish_time,
 | 
	
		
			
				|  |  | +                                    play_cnt,
 | 
	
		
			
				|  |  | +                                    like_cnt,
 | 
	
		
			
				|  |  | +                                    share_cnt,
 | 
	
		
			
				|  |  | +                                    comment_cnt,
 | 
	
		
			
				|  |  | +                                    crawler_rule,
 | 
	
		
			
				|  |  | +                                    width,
 | 
	
		
			
				|  |  | +                                    height)
 | 
	
		
			
				|  |  | +                                    values({video_id},
 | 
	
		
			
				|  |  | +                                    "{out_user_id}",
 | 
	
		
			
				|  |  | +                                    "{platform}",
 | 
	
		
			
				|  |  | +                                    "{strategy}",
 | 
	
		
			
				|  |  | +                                    "{out_video_id}",
 | 
	
		
			
				|  |  | +                                    "{video_title}",
 | 
	
		
			
				|  |  | +                                    "{cover_url}",
 | 
	
		
			
				|  |  | +                                    "{video_url}",
 | 
	
		
			
				|  |  | +                                    {duration},
 | 
	
		
			
				|  |  | +                                    "{publish_time}",
 | 
	
		
			
				|  |  | +                                    {play_cnt},
 | 
	
		
			
				|  |  | +                                    {like_cnt},
 | 
	
		
			
				|  |  | +                                    {share_cnt},
 | 
	
		
			
				|  |  | +                                    {comment_cnt},
 | 
	
		
			
				|  |  | +                                    '{crawler_rule}',
 | 
	
		
			
				|  |  | +                                    {width},
 | 
	
		
			
				|  |  | +                                    {height}) """
 | 
	
		
			
				|  |  | +                    Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
 | 
	
		
			
				|  |  | +                    MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
 | 
	
		
			
				|  |  | +                    Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +if __name__ == "__main__":
 | 
	
		
			
				|  |  | +    # Insert.insert_video_from_feishu_to_mysql("insert-dev-follow", "xiaoniangao", "dev", "local")
 | 
	
		
			
				|  |  | +    # Insert.insert_video_from_feishu_to_mysql("insert-dev-hour", "xiaoniangao", "dev", "local")
 | 
	
		
			
				|  |  | +    # Insert.insert_video_from_feishu_to_mysql("insert-prod-follow", "xiaoniangao", "prod", "local")
 | 
	
		
			
				|  |  | +    Insert.insert_video_from_feishu_to_mysql("insert-prod-2", "xiaoniangao", "prod", "local")
 | 
	
		
			
				|  |  | +    pass
 |