insert.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/4/17
  4. import json
  5. import os
  6. import sys
  7. import time
  8. from datetime import date, timedelta
  9. from hashlib import md5
  10. sys.path.append(os.getcwd())
  11. from common.common import Common
  12. from common.feishu import Feishu
  13. from common.scheduling_db import MysqlHelper
  14. class Insert:
  15. @classmethod
  16. def get_config(cls, log_type, crawler, text, env):
  17. select_sql = f"""select * from crawler_config where source="benshanzhufu" """
  18. contents = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
  19. title_list = []
  20. filter_list = []
  21. for content in contents:
  22. config = content['config']
  23. config_dict = eval(config)
  24. for k, v in config_dict.items():
  25. if k == "title":
  26. title_list_config = v.split(",")
  27. for title in title_list_config:
  28. title_list.append(title)
  29. if k == "filter":
  30. filter_list_config = v.split(",")
  31. for filter_word in filter_list_config:
  32. filter_list.append(filter_word)
  33. if text == "title":
  34. return title_list
  35. elif text == "filter":
  36. return filter_list
  37. @classmethod
  38. def before_day(cls):
  39. publish_time_str_rule = (date.today() + timedelta(days=-30)).strftime("%Y-%m-%d %H:%M:%S")
  40. publish_time_stamp_rule = int(time.mktime(time.strptime(publish_time_str_rule, "%Y-%m-%d %H:%M:%S")))
  41. print(publish_time_str_rule)
  42. print(publish_time_stamp_rule)
  43. @classmethod
  44. def insert_config(cls, log_type, crawler, env):
  45. filter_sheet = Feishu.get_values_batch(log_type, crawler, "DjXfqG")
  46. # title_sheet = Feishu.get_values_batch(log_type, crawler, "bHSW1p")
  47. filter_list = []
  48. # title_list = []
  49. for x in filter_sheet:
  50. for y in x:
  51. if y is None:
  52. pass
  53. else:
  54. filter_list.append(y)
  55. # for x in title_sheet:
  56. # for y in x:
  57. # if y is None:
  58. # pass
  59. # else:
  60. # title_list.append(y)
  61. # str_title = ','.join(title_list)
  62. str_filter = ','.join(filter_list)
  63. config_dict = {
  64. # "title": str_title,
  65. "filter": str_filter
  66. }
  67. str_config_dict = str(config_dict)
  68. # print(f"config_dict:{config_dict}")
  69. # print(f"str_config_dict:{str_config_dict}")
  70. insert_sql = f""" insert into crawler_config(title, source, config) values("本山祝福小程序", "benshanzhufu", "{str_config_dict}") """
  71. MysqlHelper.update_values(log_type, crawler, insert_sql, env)
  72. @classmethod
  73. def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env):
  74. jixiangxingfu_sheetid = ['d9e9b1']
  75. for sheetid in jixiangxingfu_sheetid:
  76. xiaoniangao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
  77. for i in range(1, len(xiaoniangao_sheet)):
  78. # for i in range(1, 5):
  79. if xiaoniangao_sheet[i][5] is None or xiaoniangao_sheet[i][7] is None:
  80. continue
  81. video_id = xiaoniangao_sheet[i][12].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace(
  82. "/info", "")
  83. if video_id == "None":
  84. continue
  85. video_id = int(video_id)
  86. out_user_id = "jixiangxingfu"
  87. platform = "吉祥幸福"
  88. strategy = "推荐榜爬虫策略"
  89. video_title = str(xiaoniangao_sheet[i][7])
  90. play_cnt = int(xiaoniangao_sheet[i][9].split("万")[0])*10000
  91. duration = str(xiaoniangao_sheet[i][10])
  92. width = int(xiaoniangao_sheet[i][11].split("*")[0])
  93. height = int(xiaoniangao_sheet[i][11].split("*")[1])
  94. cover_url = str(xiaoniangao_sheet[i][13])
  95. video_url = str(xiaoniangao_sheet[i][14])
  96. crawler_rule = json.dumps({})
  97. out_video_id = md5(video_title.encode('utf8')).hexdigest()
  98. # print(f"video_id:{video_id}, type:{type(video_id)}")
  99. # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
  100. # print(f"platform:{platform}, type:{type(platform)}")
  101. # print(f"strategy:{strategy}, type:{type(strategy)}")
  102. # print(f"video_title:{video_title}, type:{type(video_title)}")
  103. # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
  104. # print(f"video_url:{video_url}, type:{type(video_url)}")
  105. # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
  106. select_sql = f""" select * from crawler_video where platform="{platform}" and video_url="{video_url}" """
  107. Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
  108. repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env)
  109. Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
  110. if repeat_video is not None and len(repeat_video) != 0:
  111. Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
  112. else:
  113. # 视频信息保存数据库
  114. insert_sql = f""" insert into crawler_video(video_id,
  115. out_user_id,
  116. platform,
  117. strategy,
  118. out_video_id,
  119. video_title,
  120. cover_url,
  121. video_url,
  122. duration,
  123. play_cnt,
  124. crawler_rule,
  125. width,
  126. height)
  127. values({video_id},
  128. "{out_user_id}",
  129. "{platform}",
  130. "{strategy}",
  131. "{out_video_id}",
  132. "{video_title}",
  133. "{cover_url}",
  134. "{video_url}",
  135. {duration},
  136. {play_cnt},
  137. '{crawler_rule}',
  138. {width},
  139. {height}) """
  140. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  141. MysqlHelper.update_values(log_type, crawler, insert_sql, env, action='')
  142. Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
  143. if __name__ == "__main__":
  144. # Insert.insert_config("insert", "benshanzhufu", "dev")
  145. # print(Insert.get_config("insert", "ganggangdouchuan", "filter", "dev"))
  146. # Insert.insert_video_from_feishu_to_mysql("insert-dev", "ganggangdouchuan", "dev")
  147. # Insert.insert_video_from_feishu_to_mysql("insert-dev", "jixiangxingfu", "dev")
  148. Insert.insert_video_from_feishu_to_mysql("insert-prod", "jixiangxingfu", "prod")
  149. pass