123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- # -*- coding: utf-8 -*-
- # @Author: wangkun
- # @Time: 2023/2/27
- import json
- import os
- import random
- import string
- import sys
- import time
- sys.path.append(os.getcwd())
- from common.common import Common
- from common.db import MysqlHelper
- from common.feishu import Feishu
- class Insert:
- @classmethod
- def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env, machine):
- kuaishou_sheetid_list = ["fYdA8F", "3cd128", "31kOdu"]
- for sheetid in kuaishou_sheetid_list:
- kuaishou_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
- # Common.logger(log_type, crawler).info(f"kuaishou_sheet:{kuaishou_sheet}")
- for i in range(1, len(kuaishou_sheet)):
- # for i in range(1, 3):
- if kuaishou_sheet[i][5] is None:
- continue
- if kuaishou_sheet[i][9] is None:
- video_id = int(time.time())
- else:
- video_id = kuaishou_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/",
- "").replace("/info", "")
- if video_id == "None":
- continue
- video_id = int(video_id)
- user_id = 0
- out_user_id = str(kuaishou_sheet[i][18])
- platform = "快手"
- strategy = "定向爬虫策略"
- out_video_id = str(kuaishou_sheet[i][7])
- video_title = str(kuaishou_sheet[i][8])
- cover_url = str(kuaishou_sheet[i][20])
- video_url = str(kuaishou_sheet[i][21])
- duration = int(kuaishou_sheet[i][14])
- publish_time = str(kuaishou_sheet[i][16]).replace("/", "-")
- play_cnt = int(kuaishou_sheet[i][10])
- like_cnt = int(kuaishou_sheet[i][12])
- share_cnt = int(kuaishou_sheet[i][13])
- # collection_cnt = 0
- comment_cnt = int(kuaishou_sheet[i][11])
- crawler_rule = json.dumps(
- {"play_cnt": 5000, "comment_cnt": 0, "like_cnt": 5000, "share_cnt": 1000, "duration": 40,
- "publish_time": 7, "video_width": 0, "video_height": 0})
- width = int(kuaishou_sheet[i][15].split("*")[0])
- height = int(kuaishou_sheet[i][15].split("*")[1])
- # print(f"video_id:{video_id}, type:{type(video_id)}")
- # print(f"user_id:{user_id}, type:{type(user_id)}")
- # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
- # print(f"platform:{platform}, type:{type(platform)}")
- # print(f"strategy:{strategy}, type:{type(strategy)}")
- # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}")
- # print(f"video_title:{video_title}, type:{type(video_title)}")
- # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
- # print(f"video_url:{video_url}, type:{type(video_url)}")
- # print(f"duration:{duration}, type:{type(duration)}")
- # print(f"publish_time:{publish_time}, type:{type(publish_time)}")
- # print(f"play_cnt:{play_cnt}, type:{type(play_cnt)}")
- # print(f"like_cnt:{like_cnt}, type:{type(like_cnt)}")
- # print(f"share_cnt:{share_cnt}, type:{type(share_cnt)}")
- # print(f"collection_cnt:{collection_cnt}, type:{type(collection_cnt)}")
- # print(f"comment_cnt:{comment_cnt}, type:{type(comment_cnt)}")
- # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
- # print(f"width:{width}, type:{type(width)}")
- # print(f"height:{height}, type:{type(height)}\n")
- select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """
- # Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
- repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env, machine)
- Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
- repeat_video_id_sql = f""" select * from crawler_video where out_video_id="{out_video_id}" """
- repeat_video_id = MysqlHelper.get_values(log_type, crawler, repeat_video_id_sql, env, machine)
- Common.logger(log_type, crawler).info(f"repeat_video_id:{repeat_video_id}")
- if repeat_video is not None and len(repeat_video) != 0:
- Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
- elif repeat_video_id is not None and len(repeat_video_id) != 0:
- Common.logger(log_type, crawler).info(f"开始更新视频信息\n")
- update_sql = f""" UPDATE crawler_video SET
- user_id={user_id},
- out_user_id="{out_user_id}",
- platform="{platform}",
- strategy="{strategy}",
- out_video_id="{out_video_id}",
- video_title="{video_title}",
- cover_url="{cover_url}",
- video_url="{video_url}",
- duration={duration},
- publish_time="{publish_time}",
- play_cnt={play_cnt},
- like_cnt={like_cnt},
- share_cnt={share_cnt},
- comment_cnt={comment_cnt},
- crawler_rule='{crawler_rule}',
- width={width},
- height={height}
- WHERE video_id={video_id}
- """
- Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
- MysqlHelper.update_values(log_type, crawler, update_sql, env, machine)
- Common.logger(log_type, crawler).info('视频信息更新成功!\n')
- else:
- # 视频信息保存数据库
- insert_sql = f""" insert into crawler_video(video_id,
- user_id,
- out_user_id,
- platform,
- strategy,
- out_video_id,
- video_title,
- cover_url,
- video_url,
- duration,
- publish_time,
- play_cnt,
- like_cnt,
- share_cnt,
- comment_cnt,
- crawler_rule,
- width,
- height)
- values({video_id},
- {user_id},
- "{out_user_id}",
- "{platform}",
- "{strategy}",
- "{out_video_id}",
- "{video_title}",
- "{cover_url}",
- "{video_url}",
- {duration},
- "{publish_time}",
- {play_cnt},
- {like_cnt},
- {share_cnt},
- {comment_cnt},
- '{crawler_rule}',
- {width},
- {height}) """
- Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
- MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
- Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
- @classmethod
- def get_sheet(cls):
- sheet = Feishu.get_values_batch("insert", "kuaishou", "fYdA8F")
- print(sheet)
- @classmethod
- def random_out_uid(cls):
- did = "web_e2901e1c5a13c60af81ba88bc7a3ee24"
- userId = "1921947321"
- did = "web_e2901e1c5a13c60af81ba88bc7a3ee24"
- userId = "3352428474"
- src_digits = string.digits # string_数字
- src_uppercase = string.ascii_uppercase # string_大写字母
- src_lowercase = string.ascii_lowercase # string_小写字母
- # 10位随机数的方法
- userId = ''.join(str(random.choice(range(1, 10))) for _ in range(10))
- print(type(userId))
- print(userId)
- # 生成5位随机字符,包括大小写字母和数字
- a_str = ''.join(random.sample(string.ascii_letters + string.digits, 5))
- out_uid = ''.join(random.sample(string.digits, 10))
- print(type(out_uid))
- print(out_uid)
- @classmethod
- def random_cookies(cls):
- kuaishou_server_web_st="ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABaRXtfRHlzKlQVj0Nm" \
- "_M1G2wrIN1p6g3UTwfqfez6rkLVj6mPNt3RBAsLkyemMpvTLerPw0h41Q0lowqcImvIv5dlSGDEpQoj" \
- "-VTAmOR2Suzm8vCRakG7XziAWyI0PXJKhvdXms" \
- "-9Giy" \
- "_4TnoniB49Oo3m7qXjXVBCzybcWS5BO90OLkhD30GYmGEnBBvkBI2oErJy3mNbafQdBQ6SxSUHhoS" \
- "-1Rj5" \
- "-IBBNoxoIePYcxZFs4oIiCvaT7sRn" \
- "-zrF7X2ClPhfNh6lgClmH8MUjXszUfY_TPLCgFMAE"
- kuaishou_server_web_ph="1b62b98fc28bc23a42cd85240e1fd6025983"
- kuaishou_server_web_st_1 = ''.join(random.sample(string.ascii_letters + string.digits, 53))
- kuaishou_server_web_st_2 = ''.join(random.sample(string.ascii_letters + string.digits, 58))+''.join(random.sample(string.ascii_letters + string.digits, 20))
- kuaishou_server_web_st_3 = ''.join(random.sample(string.ascii_letters + string.digits, 37))
- kuaishou_server_web_st_4 = ''.join(random.sample(string.ascii_letters + string.digits, 4))
- kuaishou_server_web_st_5 = ''.join(random.sample(string.ascii_letters + string.digits, 56))+''.join(random.sample(string.ascii_letters + string.digits, 20))
- kuaishou_server_web_st_6 = ''.join(random.sample(string.ascii_letters + string.digits, 4))
- kuaishou_server_web_st_7 = ''.join(random.sample(string.ascii_letters + string.digits, 28))
- kuaishou_server_web_st_8 = ''.join(random.sample(string.ascii_letters + string.digits, 40))
- kuaishou_server_web_st = f"{kuaishou_server_web_st_1}" \
- f"_{kuaishou_server_web_st_2}" \
- f"-{kuaishou_server_web_st_3}" \
- f"-{kuaishou_server_web_st_4}" \
- f"_{kuaishou_server_web_st_5}" \
- f"-{kuaishou_server_web_st_6}" \
- f"-{kuaishou_server_web_st_7}" \
- f"-{kuaishou_server_web_st_8}"
- kuaishou_server_web_ph = ''.join(random.sample(string.ascii_letters + string.digits, 36))
- print(f"kuaishou_server_web_st:{kuaishou_server_web_st}")
- print(f"kuaishou_server_web_ph:{kuaishou_server_web_ph}")
- if __name__ == "__main__":
- # Insert.insert_video_from_feishu_to_mysql("insert-prod", "kuaishou", "prod", "local")
- # Insert.get_sheet()
- # Insert.random_out_uid()
- Insert.random_cookies()
- pass
|