insert.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/4/13
  4. import json
  5. import os
  6. import sys
  7. import time
  8. from datetime import date, timedelta
  9. sys.path.append(os.getcwd())
  10. from common.common import Common
  11. from common.feishu import Feishu
  12. from common.scheduling_db import MysqlHelper
  13. class Insert:
  14. @classmethod
  15. def get_config(cls, log_type, crawler, text, env):
  16. select_sql = f"""select * from crawler_config where source="benshanzhufu" """
  17. contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
  18. title_list = []
  19. filter_list = []
  20. for content in contents:
  21. config = content['config']
  22. config_dict = eval(config)
  23. for k, v in config_dict.items():
  24. if k == "title":
  25. title_list_config = v.split(",")
  26. for title in title_list_config:
  27. title_list.append(title)
  28. if k == "filter":
  29. filter_list_config = v.split(",")
  30. for filter_word in filter_list_config:
  31. filter_list.append(filter_word)
  32. if text == "title":
  33. return title_list
  34. elif text == "filter":
  35. return filter_list
  36. @classmethod
  37. def before_day(cls):
  38. publish_time_str_rule = (date.today() + timedelta(days=-30)).strftime("%Y-%m-%d %H:%M:%S")
  39. publish_time_stamp_rule = int(time.mktime(time.strptime(publish_time_str_rule, "%Y-%m-%d %H:%M:%S")))
  40. print(publish_time_str_rule)
  41. print(publish_time_stamp_rule)
  42. @classmethod
  43. def insert_config(cls, log_type, crawler, env):
  44. filter_sheet = Feishu.get_values_batch(log_type, crawler, "DjXfqG")
  45. # title_sheet = Feishu.get_values_batch(log_type, crawler, "bHSW1p")
  46. filter_list = []
  47. # title_list = []
  48. for x in filter_sheet:
  49. for y in x:
  50. if y is None:
  51. pass
  52. else:
  53. filter_list.append(y)
  54. # for x in title_sheet:
  55. # for y in x:
  56. # if y is None:
  57. # pass
  58. # else:
  59. # title_list.append(y)
  60. # str_title = ','.join(title_list)
  61. str_filter = ','.join(filter_list)
  62. config_dict = {
  63. # "title": str_title,
  64. "filter": str_filter
  65. }
  66. str_config_dict = str(config_dict)
  67. # print(f"config_dict:{config_dict}")
  68. # print(f"str_config_dict:{str_config_dict}")
  69. insert_sql = f""" insert into crawler_config(title, source, config) values("本山祝福小程序", "benshanzhufu", "{str_config_dict}") """
  70. MysqlHelper.update_values(log_type, crawler, insert_sql, env)
  71. @classmethod
  72. def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env):
  73. benshanzhufu_sheetid = ['440018']
  74. for sheetid in benshanzhufu_sheetid:
  75. xiaoniangao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
  76. for i in range(1, len(xiaoniangao_sheet)):
  77. # for i in range(1, 3):
  78. if xiaoniangao_sheet[i][5] is None or xiaoniangao_sheet[i][9] is None:
  79. continue
  80. video_id = xiaoniangao_sheet[i][8].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace(
  81. "/info", "")
  82. if video_id == "None":
  83. continue
  84. video_id = int(video_id)
  85. out_user_id = str(xiaoniangao_sheet[i][17])
  86. platform = "本山祝福"
  87. strategy = "推荐榜爬虫策略"
  88. out_video_id = str(xiaoniangao_sheet[i][6])
  89. video_title = str(xiaoniangao_sheet[i][7])
  90. cover_url = str(xiaoniangao_sheet[i][19])
  91. video_url = str(xiaoniangao_sheet[i][20])
  92. duration = int(xiaoniangao_sheet[i][13])
  93. publish_time = str(xiaoniangao_sheet[i][15]).replace("/", "-")
  94. play_cnt = int(xiaoniangao_sheet[i][9])
  95. like_cnt = int(xiaoniangao_sheet[i][11])
  96. share_cnt = int(xiaoniangao_sheet[i][12])
  97. # collection_cnt = 0
  98. comment_cnt = int(xiaoniangao_sheet[i][10])
  99. user_id = str(xiaoniangao_sheet[i][17])
  100. crawler_rule = json.dumps({})
  101. width = int(xiaoniangao_sheet[i][14].split("*")[0])
  102. height = int(xiaoniangao_sheet[i][14].split("*")[1])
  103. # print(f"video_id:{video_id}, type:{type(video_id)}")
  104. # print(f"user_id:{user_id}, type:{type(user_id)}")
  105. # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
  106. # print(f"platform:{platform}, type:{type(platform)}")
  107. # print(f"strategy:{strategy}, type:{type(strategy)}")
  108. # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}")
  109. # print(f"video_title:{video_title}, type:{type(video_title)}")
  110. # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
  111. # print(f"video_url:{video_url}, type:{type(video_url)}")
  112. # print(f"duration:{duration}, type:{type(duration)}")
  113. # print(f"publish_time:{publish_time}, type:{type(publish_time)}")
  114. # print(f"play_cnt:{play_cnt}, type:{type(play_cnt)}")
  115. # print(f"like_cnt:{like_cnt}, type:{type(like_cnt)}")
  116. # print(f"share_cnt:{share_cnt}, type:{type(share_cnt)}")
  117. # print(f"comment_cnt:{comment_cnt}, type:{type(comment_cnt)}")
  118. # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
  119. # print(f"width:{width}, type:{type(width)}")
  120. # print(f"height:{height}, type:{type(height)}\n")
  121. select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """
  122. Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
  123. repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env)
  124. Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
  125. if repeat_video is not None and len(repeat_video) != 0:
  126. Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
  127. else:
  128. # 视频信息保存数据库
  129. insert_sql = f""" insert into crawler_video(video_id,
  130. out_user_id,
  131. platform,
  132. strategy,
  133. out_video_id,
  134. video_title,
  135. cover_url,
  136. video_url,
  137. duration,
  138. publish_time,
  139. play_cnt,
  140. like_cnt,
  141. share_cnt,
  142. comment_cnt,
  143. crawler_rule,
  144. width,
  145. height)
  146. values({video_id},
  147. "{out_user_id}",
  148. "{platform}",
  149. "{strategy}",
  150. "{out_video_id}",
  151. "{video_title}",
  152. "{cover_url}",
  153. "{video_url}",
  154. {duration},
  155. "{publish_time}",
  156. {play_cnt},
  157. {like_cnt},
  158. {share_cnt},
  159. {comment_cnt},
  160. '{crawler_rule}',
  161. {width},
  162. {height}) """
  163. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  164. MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
  165. Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
  166. if __name__ == "__main__":
  167. # Insert.insert_config("insert", "benshanzhufu", "dev")
  168. # print(Insert.get_config("insert", "benshanzhufu", "filter", "dev"))
  169. Insert.insert_video_from_feishu_to_mysql("insert-prod", "benshanzhufu", "prod")
  170. pass