insert_videos.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/27
  4. import json
  5. import os
  6. import random
  7. import string
  8. import sys
  9. import time
  10. sys.path.append(os.getcwd())
  11. from common.common import Common
  12. from common.db import MysqlHelper
  13. from common.feishu import Feishu
  14. class Insert:
  15. @classmethod
  16. def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env, machine):
  17. kuaishou_sheetid_list = ["fYdA8F", "3cd128", "31kOdu"]
  18. for sheetid in kuaishou_sheetid_list:
  19. kuaishou_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
  20. # Common.logger(log_type, crawler).info(f"kuaishou_sheet:{kuaishou_sheet}")
  21. for i in range(1, len(kuaishou_sheet)):
  22. # for i in range(1, 3):
  23. if kuaishou_sheet[i][5] is None:
  24. continue
  25. if kuaishou_sheet[i][9] is None:
  26. video_id = int(time.time())
  27. else:
  28. video_id = kuaishou_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/",
  29. "").replace("/info", "")
  30. if video_id == "None":
  31. continue
  32. video_id = int(video_id)
  33. user_id = 0
  34. out_user_id = str(kuaishou_sheet[i][18])
  35. platform = "快手"
  36. strategy = "定向爬虫策略"
  37. out_video_id = str(kuaishou_sheet[i][7])
  38. video_title = str(kuaishou_sheet[i][8])
  39. cover_url = str(kuaishou_sheet[i][20])
  40. video_url = str(kuaishou_sheet[i][21])
  41. duration = int(kuaishou_sheet[i][14])
  42. publish_time = str(kuaishou_sheet[i][16]).replace("/", "-")
  43. play_cnt = int(kuaishou_sheet[i][10])
  44. like_cnt = int(kuaishou_sheet[i][12])
  45. share_cnt = int(kuaishou_sheet[i][13])
  46. # collection_cnt = 0
  47. comment_cnt = int(kuaishou_sheet[i][11])
  48. crawler_rule = json.dumps(
  49. {"play_cnt": 5000, "comment_cnt": 0, "like_cnt": 5000, "share_cnt": 1000, "duration": 40,
  50. "publish_time": 7, "video_width": 0, "video_height": 0})
  51. width = int(kuaishou_sheet[i][15].split("*")[0])
  52. height = int(kuaishou_sheet[i][15].split("*")[1])
  53. # print(f"video_id:{video_id}, type:{type(video_id)}")
  54. # print(f"user_id:{user_id}, type:{type(user_id)}")
  55. # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
  56. # print(f"platform:{platform}, type:{type(platform)}")
  57. # print(f"strategy:{strategy}, type:{type(strategy)}")
  58. # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}")
  59. # print(f"video_title:{video_title}, type:{type(video_title)}")
  60. # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
  61. # print(f"video_url:{video_url}, type:{type(video_url)}")
  62. # print(f"duration:{duration}, type:{type(duration)}")
  63. # print(f"publish_time:{publish_time}, type:{type(publish_time)}")
  64. # print(f"play_cnt:{play_cnt}, type:{type(play_cnt)}")
  65. # print(f"like_cnt:{like_cnt}, type:{type(like_cnt)}")
  66. # print(f"share_cnt:{share_cnt}, type:{type(share_cnt)}")
  67. # print(f"collection_cnt:{collection_cnt}, type:{type(collection_cnt)}")
  68. # print(f"comment_cnt:{comment_cnt}, type:{type(comment_cnt)}")
  69. # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
  70. # print(f"width:{width}, type:{type(width)}")
  71. # print(f"height:{height}, type:{type(height)}\n")
  72. select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """
  73. # Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
  74. repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env, machine)
  75. Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
  76. repeat_video_id_sql = f""" select * from crawler_video where out_video_id="{out_video_id}" """
  77. repeat_video_id = MysqlHelper.get_values(log_type, crawler, repeat_video_id_sql, env, machine)
  78. Common.logger(log_type, crawler).info(f"repeat_video_id:{repeat_video_id}")
  79. if repeat_video is not None and len(repeat_video) != 0:
  80. Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
  81. elif repeat_video_id is not None and len(repeat_video_id) != 0:
  82. Common.logger(log_type, crawler).info(f"开始更新视频信息\n")
  83. update_sql = f""" UPDATE crawler_video SET
  84. user_id={user_id},
  85. out_user_id="{out_user_id}",
  86. platform="{platform}",
  87. strategy="{strategy}",
  88. out_video_id="{out_video_id}",
  89. video_title="{video_title}",
  90. cover_url="{cover_url}",
  91. video_url="{video_url}",
  92. duration={duration},
  93. publish_time="{publish_time}",
  94. play_cnt={play_cnt},
  95. like_cnt={like_cnt},
  96. share_cnt={share_cnt},
  97. comment_cnt={comment_cnt},
  98. crawler_rule='{crawler_rule}',
  99. width={width},
  100. height={height}
  101. WHERE video_id={video_id}
  102. """
  103. Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
  104. MysqlHelper.update_values(log_type, crawler, update_sql, env, machine)
  105. Common.logger(log_type, crawler).info('视频信息更新成功!\n')
  106. else:
  107. # 视频信息保存数据库
  108. insert_sql = f""" insert into crawler_video(video_id,
  109. user_id,
  110. out_user_id,
  111. platform,
  112. strategy,
  113. out_video_id,
  114. video_title,
  115. cover_url,
  116. video_url,
  117. duration,
  118. publish_time,
  119. play_cnt,
  120. like_cnt,
  121. share_cnt,
  122. comment_cnt,
  123. crawler_rule,
  124. width,
  125. height)
  126. values({video_id},
  127. {user_id},
  128. "{out_user_id}",
  129. "{platform}",
  130. "{strategy}",
  131. "{out_video_id}",
  132. "{video_title}",
  133. "{cover_url}",
  134. "{video_url}",
  135. {duration},
  136. "{publish_time}",
  137. {play_cnt},
  138. {like_cnt},
  139. {share_cnt},
  140. {comment_cnt},
  141. '{crawler_rule}',
  142. {width},
  143. {height}) """
  144. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  145. MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
  146. Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
  147. @classmethod
  148. def get_sheet(cls):
  149. sheet = Feishu.get_values_batch("insert", "kuaishou", "fYdA8F")
  150. print(sheet)
  151. @classmethod
  152. def random_out_uid(cls):
  153. did = "web_e2901e1c5a13c60af81ba88bc7a3ee24"
  154. userId = "1921947321"
  155. did = "web_e2901e1c5a13c60af81ba88bc7a3ee24"
  156. userId = "3352428474"
  157. src_digits = string.digits # string_数字
  158. src_uppercase = string.ascii_uppercase # string_大写字母
  159. src_lowercase = string.ascii_lowercase # string_小写字母
  160. # 10位随机数的方法
  161. userId = ''.join(str(random.choice(range(1, 10))) for _ in range(10))
  162. print(type(userId))
  163. print(userId)
  164. # 生成5位随机字符,包括大小写字母和数字
  165. a_str = ''.join(random.sample(string.ascii_letters + string.digits, 5))
  166. out_uid = ''.join(random.sample(string.digits, 10))
  167. print(type(out_uid))
  168. print(out_uid)
  169. @classmethod
  170. def random_cookies(cls):
  171. kuaishou_server_web_st="ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABaRXtfRHlzKlQVj0Nm" \
  172. "_M1G2wrIN1p6g3UTwfqfez6rkLVj6mPNt3RBAsLkyemMpvTLerPw0h41Q0lowqcImvIv5dlSGDEpQoj" \
  173. "-VTAmOR2Suzm8vCRakG7XziAWyI0PXJKhvdXms" \
  174. "-9Giy" \
  175. "_4TnoniB49Oo3m7qXjXVBCzybcWS5BO90OLkhD30GYmGEnBBvkBI2oErJy3mNbafQdBQ6SxSUHhoS" \
  176. "-1Rj5" \
  177. "-IBBNoxoIePYcxZFs4oIiCvaT7sRn" \
  178. "-zrF7X2ClPhfNh6lgClmH8MUjXszUfY_TPLCgFMAE"
  179. kuaishou_server_web_ph="1b62b98fc28bc23a42cd85240e1fd6025983"
  180. kuaishou_server_web_st_1 = ''.join(random.sample(string.ascii_letters + string.digits, 53))
  181. kuaishou_server_web_st_2 = ''.join(random.sample(string.ascii_letters + string.digits, 58))+''.join(random.sample(string.ascii_letters + string.digits, 20))
  182. kuaishou_server_web_st_3 = ''.join(random.sample(string.ascii_letters + string.digits, 37))
  183. kuaishou_server_web_st_4 = ''.join(random.sample(string.ascii_letters + string.digits, 4))
  184. kuaishou_server_web_st_5 = ''.join(random.sample(string.ascii_letters + string.digits, 56))+''.join(random.sample(string.ascii_letters + string.digits, 20))
  185. kuaishou_server_web_st_6 = ''.join(random.sample(string.ascii_letters + string.digits, 4))
  186. kuaishou_server_web_st_7 = ''.join(random.sample(string.ascii_letters + string.digits, 28))
  187. kuaishou_server_web_st_8 = ''.join(random.sample(string.ascii_letters + string.digits, 40))
  188. kuaishou_server_web_st = f"{kuaishou_server_web_st_1}" \
  189. f"_{kuaishou_server_web_st_2}" \
  190. f"-{kuaishou_server_web_st_3}" \
  191. f"-{kuaishou_server_web_st_4}" \
  192. f"_{kuaishou_server_web_st_5}" \
  193. f"-{kuaishou_server_web_st_6}" \
  194. f"-{kuaishou_server_web_st_7}" \
  195. f"-{kuaishou_server_web_st_8}"
  196. kuaishou_server_web_ph = ''.join(random.sample(string.ascii_letters + string.digits, 36))
  197. print(f"kuaishou_server_web_st:{kuaishou_server_web_st}")
  198. print(f"kuaishou_server_web_ph:{kuaishou_server_web_ph}")
  199. if __name__ == "__main__":
  200. # Insert.insert_video_from_feishu_to_mysql("insert-prod", "kuaishou", "prod", "local")
  201. # Insert.get_sheet()
  202. # Insert.random_out_uid()
  203. Insert.random_cookies()
  204. pass