insert_video_1.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/3/14
  4. import json
  5. import os
  6. import sys
  7. sys.path.append(os.getcwd())
  8. from common.common import Common
  9. from common.scheduling_db import MysqlHelper
  10. from common.feishu import Feishu
  11. class Insert:
  12. @classmethod
  13. def insert_video_from_feishu_to_mysql(cls, log_type, crawler, env, machine):
  14. xiaoniangao_sheetid_list = ['Wu0CeL']
  15. for sheetid in xiaoniangao_sheetid_list:
  16. xiaoniangao_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
  17. for i in range(1, len(xiaoniangao_sheet)):
  18. # for i in range(1, 3):
  19. if xiaoniangao_sheet[i][5] is None or xiaoniangao_sheet[i][9] is None:
  20. continue
  21. video_id = xiaoniangao_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace("/info", "")
  22. if video_id == "None":
  23. continue
  24. video_id = int(video_id)
  25. out_user_id = str(xiaoniangao_sheet[i][19])
  26. platform = "小年糕"
  27. strategy = "定向爬虫策略"
  28. out_video_id = str(xiaoniangao_sheet[i][7])
  29. video_title = str(xiaoniangao_sheet[i][8])
  30. cover_url = str(xiaoniangao_sheet[i][21])
  31. video_url = str(xiaoniangao_sheet[i][22])
  32. duration = int(xiaoniangao_sheet[i][14])
  33. publish_time = str(xiaoniangao_sheet[i][16]).replace("/", "-")
  34. play_cnt = int(xiaoniangao_sheet[i][10])
  35. like_cnt = int(xiaoniangao_sheet[i][12])
  36. share_cnt = int(xiaoniangao_sheet[i][13])
  37. # collection_cnt = 0
  38. comment_cnt = int(xiaoniangao_sheet[i][11])
  39. crawler_rule = json.dumps({"play_cnt": {"min": 500}, "duration": {"min": 40}, "publish_day": {"min": 3}})
  40. width = int(xiaoniangao_sheet[i][15].split("*")[0])
  41. height = int(xiaoniangao_sheet[i][15].split("*")[1])
  42. # print(f"video_id:{video_id}, type:{type(video_id)}")
  43. # print(f"user_id:{user_id}, type:{type(user_id)}")
  44. # print(f"out_user_id:{out_user_id}, type:{type(out_user_id)}")
  45. # print(f"platform:{platform}, type:{type(platform)}")
  46. # print(f"strategy:{strategy}, type:{type(strategy)}")
  47. # print(f"out_video_id:{out_video_id}, type:{type(out_video_id)}")
  48. # print(f"video_title:{video_title}, type:{type(video_title)}")
  49. # print(f"cover_url:{cover_url}, type:{type(cover_url)}")
  50. # print(f"video_url:{video_url}, type:{type(video_url)}")
  51. # print(f"duration:{duration}, type:{type(duration)}")
  52. # print(f"publish_time:{publish_time}, type:{type(publish_time)}")
  53. # print(f"play_cnt:{play_cnt}, type:{type(play_cnt)}")
  54. # print(f"like_cnt:{like_cnt}, type:{type(like_cnt)}")
  55. # print(f"share_cnt:{share_cnt}, type:{type(share_cnt)}")
  56. # print(f"comment_cnt:{comment_cnt}, type:{type(comment_cnt)}")
  57. # print(f"crawler_rule:{crawler_rule}, type:{type(crawler_rule)}")
  58. # print(f"width:{width}, type:{type(width)}")
  59. # print(f"height:{height}, type:{type(height)}\n")
  60. select_sql = f""" select * from crawler_video where platform="{platform}" and out_video_id="{out_video_id}" """
  61. Common.logger(log_type, crawler).info(f"select_sql:{select_sql}")
  62. repeat_video = MysqlHelper.get_values(log_type, crawler, select_sql, env, machine)
  63. Common.logger(log_type, crawler).info(f"repeat_video:{repeat_video}")
  64. if repeat_video is not None and len(repeat_video) != 0:
  65. Common.logger(log_type, crawler).info(f"{video_title} 已存在数据库中\n")
  66. else:
  67. # 视频信息保存数据库
  68. insert_sql = f""" insert into crawler_video(video_id,
  69. out_user_id,
  70. platform,
  71. strategy,
  72. out_video_id,
  73. video_title,
  74. cover_url,
  75. video_url,
  76. duration,
  77. publish_time,
  78. play_cnt,
  79. like_cnt,
  80. share_cnt,
  81. comment_cnt,
  82. crawler_rule,
  83. width,
  84. height)
  85. values({video_id},
  86. "{out_user_id}",
  87. "{platform}",
  88. "{strategy}",
  89. "{out_video_id}",
  90. "{video_title}",
  91. "{cover_url}",
  92. "{video_url}",
  93. {duration},
  94. "{publish_time}",
  95. {play_cnt},
  96. {like_cnt},
  97. {share_cnt},
  98. {comment_cnt},
  99. '{crawler_rule}',
  100. {width},
  101. {height}) """
  102. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  103. MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
  104. Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
  105. if __name__ == "__main__":
  106. # Insert.insert_video_from_feishu_to_mysql("insert-dev-follow", "xiaoniangao", "dev", "local")
  107. # Insert.insert_video_from_feishu_to_mysql("insert-dev-hour", "xiaoniangao", "dev", "local")
  108. # Insert.insert_video_from_feishu_to_mysql("insert-prod-follow", "xiaoniangao", "prod", "local")
  109. Insert.insert_video_from_feishu_to_mysql("insert-prod-1", "xiaoniangao", "prod", "local")
  110. pass