insert_videos.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/27
  4. import json
  5. import os
  6. import sys
  7. sys.path.append(os.getcwd())
  8. from common.common import Common
  9. from common.db import MysqlHelper
  10. from common.feishu import Feishu
  11. class Insert:
  12. @classmethod
  13. def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env, machine):
  14. kuaishou_sheetid_list = ["fYdA8F", "3cd128", "31kOdu"]
  15. for sheetid in kuaishou_sheetid_list:
  16. kuaishou_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
  17. # Common.logger(log_type, crawler).info(f"kuaishou_sheet:{kuaishou_sheet}")
  18. for i in range(1, len(kuaishou_sheet)):
  19. # for i in range(1, 3):
  20. if kuaishou_sheet[i][5] is None:
  21. continue
  22. if kuaishou_sheet[i][9] is None:
  23. video_id = 0
  24. else:
  25. video_id = kuaishou_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace("/info", "")
  26. if video_id == "None":
  27. continue
  28. video_id = int(video_id)
  29. user_id = 0
  30. out_user_id = str(kuaishou_sheet[i][18])
  31. platform = "快手"
  32. strategy = "定向爬虫策略"
  33. out_video_id = str(kuaishou_sheet[i][7])
  34. video_title = str(kuaishou_sheet[i][8])
  35. cover_url = str(kuaishou_sheet[i][20])
  36. video_url = str(kuaishou_sheet[i][21])
  37. duration = int(kuaishou_sheet[i][14])
  38. publish_time = str(kuaishou_sheet[i][16].replace("/", "-"))
  39. play_cnt = int(kuaishou_sheet[i][10])
  40. like_cnt = int(kuaishou_sheet[i][12])
  41. share_cnt = int(kuaishou_sheet[i][13])
  42. # collection_cnt = 0
  43. comment_cnt = int(kuaishou_sheet[i][11])
  44. crawler_rule = json.dumps({"play_cnt": 5000, "comment_cnt": 0, "like_cnt": 5000, "share_cnt": 1000, "duration": 40, "publish_time": 7, "video_width": 0, "video_height": 0})
  45. width = int(kuaishou_sheet[i][15].split("*")[0])
  46. height = int(kuaishou_sheet[i][15].split("*")[1])
  47. # print(f"video_id:{video_id}, type:{type(video_id)}")
  48. # print(f"user_id:{user_id}, type:{type(user_id)}")
  49. # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
  50. # print(f"platform:{platform}, type:{type(platform)}")
  51. # print(f"strategy:{strategy}, type:{type(strategy)}")
  52. # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}")
  53. # print(f"video_title:{video_title}, type:{type(video_title)}")
  54. # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
  55. # print(f"video_url:{video_url}, type:{type(video_url)}")
  56. # print(f"duration:{duration}, type:{type(duration)}")
  57. # print(f"publish_time:{publish_time}, type:{type(publish_time)}")
  58. # print(f"play_cnt:{play_cnt}, type:{type(play_cnt)}")
  59. # print(f"like_cnt:{like_cnt}, type:{type(like_cnt)}")
  60. # print(f"share_cnt:{share_cnt}, type:{type(share_cnt)}")
  61. # print(f"collection_cnt:{collection_cnt}, type:{type(collection_cnt)}")
  62. # print(f"comment_cnt:{comment_cnt}, type:{type(comment_cnt)}")
  63. # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
  64. # print(f"width:{width}, type:{type(width)}")
  65. # print(f"height:{height}, type:{type(height)}\n")
  66. select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """
  67. # Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
  68. repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env, machine)
  69. Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
  70. repeat_video_id_sql = f""" select * from crawler_video where out_video_id="{out_video_id}" """
  71. repeat_video_id = MysqlHelper.get_values(log_type, crawler, repeat_video_id_sql, env, machine)
  72. Common.logger(log_type, crawler).info(f"repeat_video_id:{repeat_video_id}")
  73. if repeat_video is not None and len(repeat_video) != 0:
  74. Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
  75. elif repeat_video_id is not None and len(repeat_video_id) != 0:
  76. Common.logger(log_type, crawler).info(f"开始更新视频信息\n")
  77. update_sql = f""" UPDATE crawler_video SET
  78. user_id={user_id},
  79. out_user_id="{out_user_id}",
  80. platform="{platform}",
  81. strategy="{strategy}",
  82. out_video_id="{out_video_id}",
  83. video_title="{video_title}",
  84. cover_url="{cover_url}",
  85. video_url="{video_url}",
  86. duration={duration},
  87. publish_time="{publish_time}",
  88. play_cnt={play_cnt},
  89. like_cnt={like_cnt},
  90. share_cnt={share_cnt},
  91. comment_cnt={comment_cnt},
  92. crawler_rule='{crawler_rule}',
  93. width={width},
  94. height={height}
  95. WHERE video_id={video_id}
  96. """
  97. Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
  98. MysqlHelper.update_values(log_type, crawler, update_sql, env, machine)
  99. Common.logger(log_type, crawler).info('视频信息更新成功!\n')
  100. else:
  101. # 视频信息保存数据库
  102. insert_sql = f""" insert into crawler_video(video_id,
  103. user_id,
  104. out_user_id,
  105. platform,
  106. strategy,
  107. out_video_id,
  108. video_title,
  109. cover_url,
  110. video_url,
  111. duration,
  112. publish_time,
  113. play_cnt,
  114. like_cnt,
  115. share_cnt,
  116. comment_cnt,
  117. crawler_rule,
  118. width,
  119. height)
  120. values({video_id},
  121. {user_id},
  122. "{out_user_id}",
  123. "{platform}",
  124. "{strategy}",
  125. "{out_video_id}",
  126. "{video_title}",
  127. "{cover_url}",
  128. "{video_url}",
  129. {duration},
  130. "{publish_time}",
  131. {play_cnt},
  132. {like_cnt},
  133. {share_cnt},
  134. {comment_cnt},
  135. '{crawler_rule}',
  136. {width},
  137. {height}) """
  138. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  139. MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
  140. Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
  141. @classmethod
  142. def get_sheet(cls):
  143. sheet = Feishu.get_values_batch("insert", "kuaishou", "fYdA8F")
  144. print(sheet)
  145. if __name__ == "__main__":
  146. Insert.insert_video_from_feishu_to_mysql("insert-prod", "kuaishou", "prod", "local")
  147. # Insert.get_sheet()
  148. pass