# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2023/2/27 import json import os import random import string import sys import time sys.path.append(os.getcwd()) from common.common import Common from common.db import MysqlHelper from common.feishu import Feishu class Insert: @classmethod def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env, machine): kuaishou_sheetid_list = ["fYdA8F", "3cd128", "31kOdu"] for sheetid in kuaishou_sheetid_list: kuaishou_sheet = Feishu.get_values_batch(log_type, crawler, sheetid) # Common.logger(log_type, crawler).info(f"kuaishou_sheet:{kuaishou_sheet}") for i in range(1, len(kuaishou_sheet)): # for i in range(1, 3): if kuaishou_sheet[i][5] is None: continue if kuaishou_sheet[i][9] is None: video_id = int(time.time()) else: video_id = kuaishou_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace("/info", "") if video_id == "None": continue video_id = int(video_id) user_id = 0 out_user_id = str(kuaishou_sheet[i][18]) platform = "快手" strategy = "定向爬虫策略" out_video_id = str(kuaishou_sheet[i][7]) video_title = str(kuaishou_sheet[i][8]) cover_url = str(kuaishou_sheet[i][20]) video_url = str(kuaishou_sheet[i][21]) duration = int(kuaishou_sheet[i][14]) publish_time = str(kuaishou_sheet[i][16]).replace("/", "-") play_cnt = int(kuaishou_sheet[i][10]) like_cnt = int(kuaishou_sheet[i][12]) share_cnt = int(kuaishou_sheet[i][13]) # collection_cnt = 0 comment_cnt = int(kuaishou_sheet[i][11]) crawler_rule = json.dumps( {"play_cnt": 5000, "comment_cnt": 0, "like_cnt": 5000, "share_cnt": 1000, "duration": 40, "publish_time": 7, "video_width": 0, "video_height": 0}) width = int(kuaishou_sheet[i][15].split("*")[0]) height = int(kuaishou_sheet[i][15].split("*")[1]) # print(f"video_id:{video_id}, type:{type(video_id)}") # print(f"user_id:{user_id}, type:{type(user_id)}") # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}") # print(f"platform:{platform}, type:{type(platform)}") # print(f"strategy:{strategy}, type:{type(strategy)}") # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}") # print(f"video_title:{video_title}, type:{type(video_title)}") # print(f"cover_url:{cover_url}, type:{type(cover_url)}") # print(f"video_url:{video_url}, type:{type(video_url)}") # print(f"duration:{duration}, type:{type(duration)}") # print(f"publish_time:{publish_time}, type:{type(publish_time)}") # print(f"play_cnt:{play_cnt}, type:{type(play_cnt)}") # print(f"like_cnt:{like_cnt}, type:{type(like_cnt)}") # print(f"share_cnt:{share_cnt}, type:{type(share_cnt)}") # print(f"collection_cnt:{collection_cnt}, type:{type(collection_cnt)}") # print(f"comment_cnt:{comment_cnt}, type:{type(comment_cnt)}") # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}") # print(f"width:{width}, type:{type(width)}") # print(f"height:{height}, type:{type(height)}\n") select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """ # Common.logger(log_type, crawler).info(f"select_sql:{select_sql}") repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env, machine) Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}") repeat_video_id_sql = f""" select * from crawler_video where out_video_id="{out_video_id}" """ repeat_video_id = MysqlHelper.get_values(log_type, crawler, repeat_video_id_sql, env, machine) Common.logger(log_type, crawler).info(f"repeat_video_id:{repeat_video_id}") if repeat_video is not None and len(repeat_video) != 0: Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n") elif repeat_video_id is not None and len(repeat_video_id) != 0: Common.logger(log_type, crawler).info(f"开始更新视频信息\n") update_sql = f""" UPDATE crawler_video SET user_id={user_id}, out_user_id="{out_user_id}", platform="{platform}", strategy="{strategy}", out_video_id="{out_video_id}", video_title="{video_title}", cover_url="{cover_url}", video_url="{video_url}", duration={duration}, publish_time="{publish_time}", play_cnt={play_cnt}, like_cnt={like_cnt}, share_cnt={share_cnt}, comment_cnt={comment_cnt}, crawler_rule='{crawler_rule}', width={width}, height={height} WHERE video_id={video_id} """ Common.logger(log_type, crawler).info(f"update_sql:{update_sql}") MysqlHelper.update_values(log_type, crawler, update_sql, env, machine) Common.logger(log_type, crawler).info('视频信息更新成功!\n') else: # 视频信息保存数据库 insert_sql = f""" insert into crawler_video(video_id, user_id, out_user_id, platform, strategy, out_video_id, video_title, cover_url, video_url, duration, publish_time, play_cnt, like_cnt, share_cnt, comment_cnt, crawler_rule, width, height) values({video_id}, {user_id}, "{out_user_id}", "{platform}", "{strategy}", "{out_video_id}", "{video_title}", "{cover_url}", "{video_url}", {duration}, "{publish_time}", {play_cnt}, {like_cnt}, {share_cnt}, {comment_cnt}, '{crawler_rule}', {width}, {height}) """ Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}") MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine) Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n') @classmethod def get_sheet(cls): sheet = Feishu.get_values_batch("insert", "kuaishou", "fYdA8F") print(sheet) @classmethod def random_out_uid(cls): did = "web_e2901e1c5a13c60af81ba88bc7a3ee24" userId = "1921947321" did = "web_e2901e1c5a13c60af81ba88bc7a3ee24" userId = "3352428474" src_digits = string.digits # string_数字 src_uppercase = string.ascii_uppercase # string_大写字母 src_lowercase = string.ascii_lowercase # string_小写字母 # 10位随机数的方法 userId = ''.join(str(random.choice(range(1, 10))) for _ in range(10)) print(type(userId)) print(userId) # 生成5位随机字符,包括大小写字母和数字 a_str = ''.join(random.sample(string.ascii_letters + string.digits, 5)) out_uid = ''.join(random.sample(string.digits, 10)) print(type(out_uid)) print(out_uid) @classmethod def random_cookies(cls): kuaishou_server_web_st="ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABaRXtfRHlzKlQVj0Nm" \ "_M1G2wrIN1p6g3UTwfqfez6rkLVj6mPNt3RBAsLkyemMpvTLerPw0h41Q0lowqcImvIv5dlSGDEpQoj" \ "-VTAmOR2Suzm8vCRakG7XziAWyI0PXJKhvdXms" \ "-9Giy" \ "_4TnoniB49Oo3m7qXjXVBCzybcWS5BO90OLkhD30GYmGEnBBvkBI2oErJy3mNbafQdBQ6SxSUHhoS" \ "-1Rj5" \ "-IBBNoxoIePYcxZFs4oIiCvaT7sRn" \ "-zrF7X2ClPhfNh6lgClmH8MUjXszUfY_TPLCgFMAE" kuaishou_server_web_ph="1b62b98fc28bc23a42cd85240e1fd6025983" kuaishou_server_web_st_1 = ''.join(random.sample(string.ascii_letters + string.digits, 53)) kuaishou_server_web_st_2 = ''.join(random.sample(string.ascii_letters + string.digits, 58))+''.join(random.sample(string.ascii_letters + string.digits, 20)) kuaishou_server_web_st_3 = ''.join(random.sample(string.ascii_letters + string.digits, 37)) kuaishou_server_web_st_4 = ''.join(random.sample(string.ascii_letters + string.digits, 4)) kuaishou_server_web_st_5 = ''.join(random.sample(string.ascii_letters + string.digits, 56))+''.join(random.sample(string.ascii_letters + string.digits, 20)) kuaishou_server_web_st_6 = ''.join(random.sample(string.ascii_letters + string.digits, 4)) kuaishou_server_web_st_7 = ''.join(random.sample(string.ascii_letters + string.digits, 28)) kuaishou_server_web_st_8 = ''.join(random.sample(string.ascii_letters + string.digits, 40)) kuaishou_server_web_st = f"{kuaishou_server_web_st_1}" \ f"_{kuaishou_server_web_st_2}" \ f"-{kuaishou_server_web_st_3}" \ f"-{kuaishou_server_web_st_4}" \ f"_{kuaishou_server_web_st_5}" \ f"-{kuaishou_server_web_st_6}" \ f"-{kuaishou_server_web_st_7}" \ f"-{kuaishou_server_web_st_8}" kuaishou_server_web_ph = ''.join(random.sample(string.ascii_letters + string.digits, 36)) print(f"kuaishou_server_web_st:{kuaishou_server_web_st}") print(f"kuaishou_server_web_ph:{kuaishou_server_web_ph}") if __name__ == "__main__": # Insert.insert_video_from_feishu_to_mysql("insert-prod", "kuaishou", "prod", "local") # Insert.get_sheet() # Insert.random_out_uid() Insert.random_cookies() pass