insert_videos.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/27
  4. import json
  5. import os
  6. import random
  7. import string
  8. import sys
  9. import time
  10. sys.path.append(os.getcwd())
  11. from common.common import Common
  12. from common.db import MysqlHelper
  13. from common.feishu import Feishu
  14. class Insert:
  15. @classmethod
  16. def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env, machine):
  17. kuaishou_sheetid_list = ["fYdA8F", "3cd128", "31kOdu"]
  18. for sheetid in kuaishou_sheetid_list:
  19. kuaishou_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
  20. # Common.logger(log_type, crawler).info(f"kuaishou_sheet:{kuaishou_sheet}")
  21. for i in range(1, len(kuaishou_sheet)):
  22. # for i in range(1, 3):
  23. if kuaishou_sheet[i][5] is None:
  24. continue
  25. if kuaishou_sheet[i][9] is None:
  26. video_id = int(time.time())
  27. else:
  28. video_id = kuaishou_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace("/info", "")
  29. if video_id == "None":
  30. continue
  31. video_id = int(video_id)
  32. user_id = 0
  33. out_user_id = str(kuaishou_sheet[i][18])
  34. platform = "快手"
  35. strategy = "定向爬虫策略"
  36. out_video_id = str(kuaishou_sheet[i][7])
  37. video_title = str(kuaishou_sheet[i][8])
  38. cover_url = str(kuaishou_sheet[i][20])
  39. video_url = str(kuaishou_sheet[i][21])
  40. duration = int(kuaishou_sheet[i][14])
  41. publish_time = str(kuaishou_sheet[i][16]).replace("/", "-")
  42. play_cnt = int(kuaishou_sheet[i][10])
  43. like_cnt = int(kuaishou_sheet[i][12])
  44. share_cnt = int(kuaishou_sheet[i][13])
  45. # collection_cnt = 0
  46. comment_cnt = int(kuaishou_sheet[i][11])
  47. crawler_rule = json.dumps({"play_cnt": 5000, "comment_cnt": 0, "like_cnt": 5000, "share_cnt": 1000, "duration": 40, "publish_time": 7, "video_width": 0, "video_height": 0})
  48. width = int(kuaishou_sheet[i][15].split("*")[0])
  49. height = int(kuaishou_sheet[i][15].split("*")[1])
  50. # print(f"video_id:{video_id}, type:{type(video_id)}")
  51. # print(f"user_id:{user_id}, type:{type(user_id)}")
  52. # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
  53. # print(f"platform:{platform}, type:{type(platform)}")
  54. # print(f"strategy:{strategy}, type:{type(strategy)}")
  55. # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}")
  56. # print(f"video_title:{video_title}, type:{type(video_title)}")
  57. # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
  58. # print(f"video_url:{video_url}, type:{type(video_url)}")
  59. # print(f"duration:{duration}, type:{type(duration)}")
  60. # print(f"publish_time:{publish_time}, type:{type(publish_time)}")
  61. # print(f"play_cnt:{play_cnt}, type:{type(play_cnt)}")
  62. # print(f"like_cnt:{like_cnt}, type:{type(like_cnt)}")
  63. # print(f"share_cnt:{share_cnt}, type:{type(share_cnt)}")
  64. # print(f"collection_cnt:{collection_cnt}, type:{type(collection_cnt)}")
  65. # print(f"comment_cnt:{comment_cnt}, type:{type(comment_cnt)}")
  66. # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
  67. # print(f"width:{width}, type:{type(width)}")
  68. # print(f"height:{height}, type:{type(height)}\n")
  69. select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """
  70. # Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
  71. repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env, machine)
  72. Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
  73. repeat_video_id_sql = f""" select * from crawler_video where out_video_id="{out_video_id}" """
  74. repeat_video_id = MysqlHelper.get_values(log_type, crawler, repeat_video_id_sql, env, machine)
  75. Common.logger(log_type, crawler).info(f"repeat_video_id:{repeat_video_id}")
  76. if repeat_video is not None and len(repeat_video) != 0:
  77. Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
  78. elif repeat_video_id is not None and len(repeat_video_id) != 0:
  79. Common.logger(log_type, crawler).info(f"开始更新视频信息\n")
  80. update_sql = f""" UPDATE crawler_video SET
  81. user_id={user_id},
  82. out_user_id="{out_user_id}",
  83. platform="{platform}",
  84. strategy="{strategy}",
  85. out_video_id="{out_video_id}",
  86. video_title="{video_title}",
  87. cover_url="{cover_url}",
  88. video_url="{video_url}",
  89. duration={duration},
  90. publish_time="{publish_time}",
  91. play_cnt={play_cnt},
  92. like_cnt={like_cnt},
  93. share_cnt={share_cnt},
  94. comment_cnt={comment_cnt},
  95. crawler_rule='{crawler_rule}',
  96. width={width},
  97. height={height}
  98. WHERE video_id={video_id}
  99. """
  100. Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
  101. MysqlHelper.update_values(log_type, crawler, update_sql, env, machine)
  102. Common.logger(log_type, crawler).info('视频信息更新成功!\n')
  103. else:
  104. # 视频信息保存数据库
  105. insert_sql = f""" insert into crawler_video(video_id,
  106. user_id,
  107. out_user_id,
  108. platform,
  109. strategy,
  110. out_video_id,
  111. video_title,
  112. cover_url,
  113. video_url,
  114. duration,
  115. publish_time,
  116. play_cnt,
  117. like_cnt,
  118. share_cnt,
  119. comment_cnt,
  120. crawler_rule,
  121. width,
  122. height)
  123. values({video_id},
  124. {user_id},
  125. "{out_user_id}",
  126. "{platform}",
  127. "{strategy}",
  128. "{out_video_id}",
  129. "{video_title}",
  130. "{cover_url}",
  131. "{video_url}",
  132. {duration},
  133. "{publish_time}",
  134. {play_cnt},
  135. {like_cnt},
  136. {share_cnt},
  137. {comment_cnt},
  138. '{crawler_rule}',
  139. {width},
  140. {height}) """
  141. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  142. MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
  143. Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
  144. @classmethod
  145. def get_sheet(cls):
  146. sheet = Feishu.get_values_batch("insert", "kuaishou", "fYdA8F")
  147. print(sheet)
  148. @classmethod
  149. def random_out_uid(cls):
  150. did = "web_e2901e1c5a13c60af81ba88bc7a3ee24"
  151. userId = "1921947321"
  152. did = "web_e2901e1c5a13c60af81ba88bc7a3ee24"
  153. userId = "3352428474"
  154. src_digits = string.digits # string_数字
  155. src_uppercase = string.ascii_uppercase # string_大写字母
  156. src_lowercase = string.ascii_lowercase # string_小写字母
  157. # 10位随机数的方法
  158. userId = ''.join(str(random.choice(range(1, 10))) for _ in range(10))
  159. print(type(userId))
  160. print(userId)
  161. # 生成5位随机字符,包括大小写字母和数字
  162. a_str = ''.join(random.sample(string.ascii_letters + string.digits, 5))
  163. out_uid = ''.join(random.sample(string.digits, 10))
  164. print(type(out_uid))
  165. print(out_uid)
  166. if __name__ == "__main__":
  167. Insert.insert_video_from_feishu_to_mysql("insert-prod", "kuaishou", "prod", "local")
  168. # Insert.get_sheet()
  169. # Insert.random_out_uid()
  170. pass