xiaoniangao_hour_scheduling.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/3/15
  4. import datetime
  5. import json
  6. import os
  7. import random
  8. import shutil
  9. import sys
  10. import time
  11. from hashlib import md5
  12. import requests
  13. import urllib3
  14. sys.path.append(os.getcwd())
  15. from common.common import Common
  16. from common.feishu import Feishu
  17. from common.publish import Publish
  18. from common.scheduling_db import MysqlHelper
  19. from common.public import get_config_from_mysql, download_rule
  20. proxies = {"http": None, "https": None}
  21. class XiaoniangaoHourScheduling:
  22. platform = "小年糕"
  23. words = "abcdefghijklmnopqrstuvwxyz0123456789"
  24. uid_token_dict = {
  25. "uid": f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}""",
  26. "token": "".join(random.sample(words, 32))
  27. }
  28. @classmethod
  29. def repeat_video(cls, log_type, crawler, video_id, env):
  30. sql = f""" select * from crawler_video where platform="小年糕" and out_video_id="{video_id}"; """
  31. repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
  32. return len(repeat_video)
  33. @classmethod
  34. def repeat_hour(cls, log_type, crawler, video_id, env):
  35. sql = f""" select * from crawler_xiaoniangao_hour where platform="小年糕" and out_video_id="{video_id}"; """
  36. repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
  37. return len(repeat_video)
  38. # 获取列表
  39. @classmethod
  40. def get_videoList(cls, log_type, crawler, rule_dict, env):
  41. uid_token_dict = cls.uid_token_dict
  42. url = "https://kapi-xng-app.xiaoniangao.cn/v2/trends/recommend"
  43. payload = "{\"topic_name\":\"recommend\",\"ext\":{\"current_item\":0,\"items\":[]},\"tag_id\":0,\"refresh\":false,\"share_width\":300,\"share_height\":240,\"staggered_style\":0,\"qs\":\"imageMogr2\\/gravity\\/center\\/rotate\\/$\\/thumbnail\\/!750x500r\\/interlace\\/1\\/format\\/jpg\",\"topic_id\":2,\"h_qs\":\"imageMogr2\\/gravity\\/center\\/rotate\\/$\\/thumbnail\\/!80x80r\\/interlace\\/1\\/format\\/jpg\",\"log\":{\"net\":\"wifi\",\"product\":\"xng\",\"uid\":\"2F310D09-5E32-5985-8644-3BCB6920E76F\",\"brand\":\"iPad\",\"page\":\"\",\"session_id\":\"71C77648-3224-4083-894C-B6282131F286\",\"resolution\":\"750*1334\",\"pf\":\"4\",\"app_version\":\"1.22.5\",\"device\":\"iPad Pro (12.9-inch) (3rd generation)\",\"os_version\":\"15.7\",\"idfa\":\"\",\"channel\":\"ios_app_store\"},\"token\":\"\"}"
  44. headers = {
  45. 'Host': 'kapi-xng-app.xiaoniangao.cn',
  46. 'content-type': 'application/json; charset=utf-8',
  47. 'accept': 'application/json',
  48. 'authorization': 'PsrUTBCQ5G7UVZdgx+JxymPHcKU=',
  49. 'verb': 'POST',
  50. 'content-md5': '08fa0e6bf725fd6ef83c16d2ceb8a544',
  51. 'x-b3-traceid': '45a6c5b4c471eecc',
  52. 'accept-language': 'zh-Hans-CN;q=1.0',
  53. 'date': 'Mon, 19 Jun 2023 09:47:40 GMT',
  54. 'x-token-id': '',
  55. 'x-signaturemethod': 'hmac-sha1',
  56. 'user-agent': 'xngapp/1.22.5 (cn.xiaoniangao.xngapp; build:157; iOS 15.7.0) Alamofire/5.2.2'
  57. }
  58. urllib3.disable_warnings()
  59. r = requests.post(url=url, headers=headers, data=payload, proxies=proxies, verify=False)
  60. if 'data' not in r.text or r.status_code != 200:
  61. Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
  62. Common.logging(log_type, crawler, env, f"get_videoList:{r.text}\n")
  63. return
  64. elif "data" not in r.json():
  65. Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()}\n")
  66. Common.logging(log_type, crawler, env, f"get_videoList:{r.json()}\n")
  67. return
  68. elif "list" not in r.json()["data"]:
  69. Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}\n")
  70. Common.logging(log_type, crawler, env, f"get_videoList:{r.json()['data']}\n")
  71. return
  72. elif len(r.json()['data']['list']) == 0:
  73. Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}\n")
  74. Common.logging(log_type, crawler, env, f"get_videoList:{r.json()['data']['list']}\n")
  75. return
  76. else:
  77. # 视频列表数据
  78. feeds = r.json()["data"]["list"]
  79. for i in range(len(feeds)):
  80. try:
  81. # 标题,表情随机加在片头、片尾,或替代句子中间的标点符号
  82. xiaoniangao_title = feeds[i].get("title", "").strip().replace("\n", "") \
  83. .replace("/", "").replace("\r", "").replace("#", "") \
  84. .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
  85. .replace(":", "").replace("*", "").replace("?", "") \
  86. .replace("?", "").replace('"', "").replace("<", "") \
  87. .replace(">", "").replace("|", "").replace(" ", "")\
  88. .replace('"', '').replace("'", '')
  89. # 随机取一个表情/符号
  90. emoji = random.choice(get_config_from_mysql(log_type, crawler, env, "emoji"))
  91. # 生成最终标题,标题list[表情+title, title+表情]随机取一个
  92. video_title = random.choice([f"{emoji}{xiaoniangao_title}", f"{xiaoniangao_title}{emoji}"])
  93. # 发布时间
  94. publish_time_stamp = int(int(feeds[i].get("t", 0))/1000)
  95. publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
  96. # 用户名 / 头像
  97. user_name = feeds[i].get("user", {}).get("nick", "").strip().replace("\n", "") \
  98. .replace("/", "").replace(" ", "") \
  99. .replace(" ", "").replace("&NBSP", "").replace("\r", "")
  100. video_dict = {
  101. "video_title": video_title,
  102. "video_id": feeds[i].get("vid", ""),
  103. "duration": int(feeds[i].get("du", 0)/1000),
  104. "play_cnt": feeds[i].get("play_pv", 0),
  105. "like_cnt": feeds[i].get("favor", {}).get("total", 0),
  106. "comment_cnt": feeds[i].get("comment_count", 0),
  107. "share_cnt": feeds[i].get("share", 0),
  108. "user_name": user_name,
  109. "publish_time_stamp": publish_time_stamp,
  110. "publish_time_str": publish_time_str,
  111. "video_width": int(feeds[i].get("vw", 0)),
  112. "video_height": int(feeds[i].get("vh", 0)),
  113. "avatar_url": feeds[i].get("user", {}).get("hurl", ""),
  114. "profile_id": feeds[i]["id"],
  115. "profile_mid": feeds[i]["user"]["mid"],
  116. "cover_url": feeds[i].get("url", ""),
  117. "video_url": feeds[i].get("v_url", ""),
  118. "session": f"xiaoniangao-hour-{int(time.time())}"
  119. }
  120. for k, v in video_dict.items():
  121. Common.logger(log_type, crawler).info(f"{k}:{v}")
  122. Common.logging(log_type, crawler, env, f"{video_dict}")
  123. # 过滤无效视频
  124. if video_title == "" or video_dict["video_id"] == "" or video_dict["video_url"] == "":
  125. Common.logger(log_type, crawler).warning("无效视频\n")
  126. Common.logging(log_type, crawler, env, "无效视频\n")
  127. # 抓取基础规则过滤
  128. elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
  129. Common.logger(log_type, crawler).info("不满足抓取规则\n")
  130. Common.logging(log_type, crawler, env, "不满足抓取规则\n")
  131. elif any(str(word) if str(word) in video_dict["video_title"] else False
  132. for word in get_config_from_mysql(log_type=log_type,
  133. source=crawler,
  134. env=env,
  135. text="filter",
  136. action="")) is True:
  137. Common.logger(log_type, crawler).info('已中过滤词\n')
  138. Common.logging(log_type, crawler, env, '已中过滤词\n')
  139. elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
  140. Common.logger(log_type, crawler).info('视频已下载\n')
  141. Common.logging(log_type, crawler, env, '视频已下载\n')
  142. else:
  143. # 写入飞书小时级feeds数据库表
  144. insert_sql = f""" insert into crawler_xiaoniangao_hour(profile_id,
  145. profile_mid,
  146. platform,
  147. out_video_id,
  148. video_title,
  149. user_name,
  150. cover_url,
  151. video_url,
  152. duration,
  153. publish_time,
  154. play_cnt,
  155. crawler_time_stamp,
  156. crawler_time)
  157. values({video_dict["profile_id"]},
  158. {video_dict["profile_mid"]},
  159. "{cls.platform}",
  160. "{video_dict["video_id"]}",
  161. "{video_title}",
  162. "{user_name}",
  163. "{video_dict["cover_url"]}",
  164. "{video_dict["video_url"]}",
  165. {video_dict["duration"]},
  166. "{publish_time_str}",
  167. {video_dict["play_cnt"]},
  168. {int(time.time())},
  169. "{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))}"
  170. )"""
  171. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  172. Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
  173. MysqlHelper.update_values(log_type, crawler, insert_sql, env)
  174. Common.logger(log_type, crawler).info('视频信息写入小时级数据库成功!\n')
  175. Common.logging(log_type, crawler, env, '视频信息写入小时级数据库成功!\n')
  176. except Exception as e:
  177. Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
  178. Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
  179. @classmethod
  180. def get_video_info(cls, log_type, crawler, p_id, p_mid, v_title, v_id):
  181. uid_token_dict = cls.uid_token_dict
  182. url = "https://kapi.xiaoniangao.cn/profile/get_profile_by_id"
  183. headers = {
  184. "x-b3-traceid": '1c403a4aa72e3c',
  185. "X-Token-Id": 'ab619e96d801f1567388629260aa68ec-1202200806',
  186. "uid": uid_token_dict['uid'],
  187. "content-type": "application/json",
  188. "Accept-Encoding": "gzip,compress,br,deflate",
  189. "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
  190. ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
  191. 'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
  192. "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/624/page-frame.html'
  193. }
  194. data = {
  195. "play_src": "1",
  196. "profile_id": int(p_id),
  197. "profile_mid": int(p_mid),
  198. "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/"
  199. "!400x400r/crop/400x400/interlace/1/format/jpg",
  200. "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail"
  201. "/!80x80r/crop/80x80/interlace/1/format/jpg",
  202. "share_width": 625,
  203. "share_height": 500,
  204. "no_comments": True,
  205. "no_follow": True,
  206. "vid": v_id,
  207. "hot_l1_comment": True,
  208. "token": uid_token_dict['token'],
  209. "uid": uid_token_dict['uid'],
  210. "proj": "ma",
  211. "wx_ver": "8.0.20",
  212. "code_ver": "3.62.0",
  213. "log_common_params": {
  214. "e": [{
  215. "data": {
  216. "page": "dynamicSharePage"
  217. }
  218. }],
  219. "ext": {
  220. "brand": "iPhone",
  221. "device": "iPhone 11",
  222. "os": "iOS 14.7.1",
  223. "weixinver": "8.0.20",
  224. "srcver": "2.24.3",
  225. "net": "wifi",
  226. "scene": "1089"
  227. },
  228. "pj": "1",
  229. "pf": "2",
  230. "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
  231. }
  232. }
  233. urllib3.disable_warnings()
  234. r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
  235. if r.status_code != 200 or 'data' not in r.text:
  236. Common.logger(log_type, crawler).warning(f"get_videoInfo:{r.text}\n")
  237. else:
  238. hour_play_cnt = r.json()["data"]["play_pv"]
  239. hour_cover_url = r.json()["data"]["url"]
  240. hour_video_url = r.json()["data"]["v_url"]
  241. hour_video_duration = r.json()["data"]["du"]
  242. hour_video_comment_cnt = r.json()["data"]["comment_count"]
  243. hour_video_like_cnt = r.json()["data"]["favor"]["total"]
  244. hour_video_share_cnt = r.json()["data"]["share"]
  245. hour_video_width = r.json()["data"]["w"]
  246. hour_video_height = r.json()["data"]["h"]
  247. hour_video_send_time = r.json()["data"]["t"]
  248. publish_time_stamp = int(int(hour_video_send_time) / 1000)
  249. publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
  250. hour_user_name = r.json()["data"]["user"]["nick"]
  251. hour_head_url = r.json()["data"]["user"]["hurl"]
  252. video_info_dict = {
  253. "video_id": v_id,
  254. "video_title": v_title,
  255. "duration": hour_video_duration,
  256. "play_cnt": hour_play_cnt,
  257. "like_cnt": hour_video_like_cnt,
  258. "comment_cnt": hour_video_comment_cnt,
  259. "share_cnt": hour_video_share_cnt,
  260. "user_name": hour_user_name,
  261. "publish_time_stamp": publish_time_stamp,
  262. "publish_time_str": publish_time_str,
  263. "video_width": hour_video_width,
  264. "video_height": hour_video_height,
  265. "avatar_url": hour_head_url,
  266. "profile_id": p_id,
  267. "profile_mid": p_mid,
  268. "cover_url": hour_cover_url,
  269. "video_url": hour_video_url,
  270. "session": f"xiaoniangao-hour-{int(time.time())}"
  271. }
  272. return video_info_dict
  273. # 更新小时榜数据
  274. @classmethod
  275. def update_videoList(cls, log_type, crawler, rule_dict, our_uid, env):
  276. """
  277. 更新小时榜数据
  278. """
  279. befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
  280. update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
  281. select_sql = f""" select * from crawler_xiaoniangao_hour where crawler_time_stamp>={update_time_stamp} GROUP BY out_video_id DESC """
  282. update_video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env)
  283. if len(update_video_list) == 0:
  284. Common.logger(log_type, crawler).info("暂无需要更新的小时榜数据\n")
  285. Common.logging(log_type, crawler, env, "暂无需要更新的小时榜数据\n")
  286. return
  287. for update_video_info in update_video_list:
  288. try:
  289. profile_id = update_video_info["profile_id"]
  290. profile_mid = update_video_info["profile_mid"]
  291. video_title = update_video_info["video_title"]
  292. video_id = update_video_info["out_video_id"]
  293. if datetime.datetime.now().hour == 10 and datetime.datetime.now().minute <= 10:
  294. video_info_dict = cls.get_video_info(log_type=log_type,
  295. crawler=crawler,
  296. p_id=profile_id,
  297. p_mid=profile_mid,
  298. v_title=video_title,
  299. v_id=video_id)
  300. ten_play_cnt = video_info_dict['play_cnt']
  301. Common.logger(log_type, crawler).info(f"ten_play_cnt:{ten_play_cnt}")
  302. Common.logging(log_type, crawler, env, f"ten_play_cnt:{ten_play_cnt}")
  303. update_sql = f""" update crawler_xiaoniangao_hour set ten_play_cnt={ten_play_cnt} WHERE out_video_id="{video_id}"; """
  304. # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
  305. MysqlHelper.update_values(log_type, crawler, update_sql, env)
  306. cls.download_publish(log_type=log_type,
  307. crawler=crawler,
  308. video_info_dict=video_info_dict,
  309. rule_dict=rule_dict,
  310. update_video_info=update_video_info,
  311. our_uid=our_uid,
  312. env=env)
  313. elif datetime.datetime.now().hour == 15 and datetime.datetime.now().minute <= 10:
  314. video_info_dict = cls.get_video_info(log_type=log_type,
  315. crawler=crawler,
  316. p_id=profile_id,
  317. p_mid=profile_mid,
  318. v_title=video_title,
  319. v_id=video_id)
  320. fifteen_play_cnt = video_info_dict['play_cnt']
  321. Common.logger(log_type, crawler).info(f"fifteen_play_cnt:{fifteen_play_cnt}")
  322. Common.logging(log_type, crawler, env, f"fifteen_play_cnt:{fifteen_play_cnt}")
  323. update_sql = f""" update crawler_xiaoniangao_hour set fifteen_play_cnt={fifteen_play_cnt} WHERE out_video_id="{video_id}"; """
  324. # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
  325. MysqlHelper.update_values(log_type, crawler, update_sql, env)
  326. cls.download_publish(log_type=log_type,
  327. crawler=crawler,
  328. video_info_dict=video_info_dict,
  329. rule_dict=rule_dict,
  330. update_video_info=update_video_info,
  331. our_uid=our_uid,
  332. env=env)
  333. elif datetime.datetime.now().hour == 20 and datetime.datetime.now().minute <= 10:
  334. video_info_dict = cls.get_video_info(log_type=log_type,
  335. crawler=crawler,
  336. p_id=profile_id,
  337. p_mid=profile_mid,
  338. v_title=video_title,
  339. v_id=video_id)
  340. twenty_play_cnt = video_info_dict['play_cnt']
  341. Common.logger(log_type, crawler).info(f"twenty_play_cnt:{twenty_play_cnt}")
  342. Common.logging(log_type, crawler, env, f"twenty_play_cnt:{twenty_play_cnt}")
  343. update_sql = f""" update crawler_xiaoniangao_hour set twenty_play_cnt={twenty_play_cnt} WHERE out_video_id="{video_id}"; """
  344. # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
  345. MysqlHelper.update_values(log_type, crawler, update_sql, env)
  346. cls.download_publish(log_type=log_type,
  347. crawler=crawler,
  348. video_info_dict=video_info_dict,
  349. rule_dict=rule_dict,
  350. update_video_info=update_video_info,
  351. our_uid=our_uid,
  352. env=env)
  353. else:
  354. pass
  355. except Exception as e:
  356. Common.logger(log_type, crawler).error(f'更新{update_video_info["video_title"]}时异常:{e}\n')
  357. Common.logging(log_type, crawler, env, f'更新{update_video_info["video_title"]}时异常:{e}\n')
  358. @classmethod
  359. def download(cls, log_type, crawler, video_info_dict, rule_dict, our_uid, env):
  360. # 下载视频
  361. Common.download_method(log_type=log_type, crawler=crawler, text="video", title=video_info_dict["video_title"],
  362. url=video_info_dict["video_url"])
  363. md_title = md5(video_info_dict['video_title'].encode('utf8')).hexdigest()
  364. try:
  365. if os.path.getsize(f"./{crawler}/videos/{md_title}/video.mp4") == 0:
  366. # 删除视频文件夹
  367. shutil.rmtree(f"./{crawler}/videos/{md_title}")
  368. Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
  369. Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
  370. return
  371. except FileNotFoundError:
  372. # 删除视频文件夹
  373. shutil.rmtree(f"./{crawler}/videos/{md_title}")
  374. Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
  375. Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
  376. return
  377. # 下载封面
  378. Common.download_method(log_type=log_type, crawler=crawler, text="cover", title=video_info_dict["video_title"],
  379. url=video_info_dict["cover_url"])
  380. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  381. Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_info_dict)
  382. # 上传视频
  383. Common.logger(log_type, crawler).info("开始上传视频...")
  384. Common.logging(log_type, crawler, env, "开始上传视频...")
  385. if env == "dev":
  386. oss_endpoint = "out"
  387. our_video_id = Publish.upload_and_publish(log_type=log_type,
  388. crawler=crawler,
  389. strategy="上升榜抓取策略",
  390. our_uid=our_uid,
  391. env=env,
  392. oss_endpoint=oss_endpoint)
  393. our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
  394. else:
  395. oss_endpoint = "inner"
  396. our_video_id = Publish.upload_and_publish(log_type=log_type,
  397. crawler=crawler,
  398. strategy="上升榜抓取策略",
  399. our_uid=our_uid,
  400. env=env,
  401. oss_endpoint=oss_endpoint)
  402. our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
  403. if our_video_id is None:
  404. try:
  405. # 删除视频文件夹
  406. shutil.rmtree(f"./{crawler}/videos/{md_title}")
  407. return
  408. except FileNotFoundError:
  409. return
  410. insert_sql = f""" insert into crawler_video(video_id,
  411. out_user_id,
  412. platform,
  413. strategy,
  414. out_video_id,
  415. video_title,
  416. cover_url,
  417. video_url,
  418. duration,
  419. publish_time,
  420. play_cnt,
  421. crawler_rule,
  422. width,
  423. height)
  424. values({our_video_id},
  425. "{video_info_dict['profile_id']}",
  426. "{cls.platform}",
  427. "上升榜抓取策略",
  428. "{video_info_dict['video_id']}",
  429. "{video_info_dict['video_title']}",
  430. "{video_info_dict['cover_url']}",
  431. "{video_info_dict['video_url']}",
  432. {int(video_info_dict['duration'])},
  433. "{video_info_dict['publish_time_str']}",
  434. {int(video_info_dict['play_cnt'])},
  435. '{json.dumps(rule_dict)}',
  436. {int(video_info_dict['video_width'])},
  437. {int(video_info_dict['video_height'])}) """
  438. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  439. Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
  440. MysqlHelper.update_values(log_type, crawler, insert_sql, env)
  441. Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
  442. Common.logging(log_type, crawler, env, '视频信息插入数据库成功!')
  443. # 视频写入飞书
  444. Feishu.insert_columns(log_type, crawler, "yatRv2", "ROWS", 1, 2)
  445. # 视频ID工作表,首行写入数据
  446. upload_time = int(time.time())
  447. values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
  448. "上升榜抓取策略",
  449. str(video_info_dict['video_id']),
  450. str(video_info_dict['video_title']),
  451. our_video_link,
  452. video_info_dict['play_cnt'],
  453. video_info_dict['comment_cnt'],
  454. video_info_dict['like_cnt'],
  455. video_info_dict['share_cnt'],
  456. video_info_dict['duration'],
  457. f"{video_info_dict['video_width']}*{video_info_dict['video_height']}",
  458. str(video_info_dict['publish_time_str'].replace("-", "/")),
  459. str(video_info_dict['user_name']),
  460. str(video_info_dict['profile_id']),
  461. str(video_info_dict['profile_mid']),
  462. str(video_info_dict['avatar_url']),
  463. str(video_info_dict['cover_url']),
  464. str(video_info_dict['video_url'])]]
  465. time.sleep(1)
  466. Feishu.update_values(log_type, crawler, "yatRv2", "F2:Z2", values)
  467. Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
  468. Common.logging(log_type, crawler, env, '视频信息写入飞书成功\n')
  469. # 下载/上传
  470. @classmethod
  471. def download_publish(cls, log_type, crawler, video_info_dict, rule_dict, update_video_info, our_uid, env):
  472. if cls.repeat_video(log_type, crawler, video_info_dict["video_id"], env) != 0:
  473. Common.logger(log_type, crawler).info('视频已下载\n')
  474. Common.logging(log_type, crawler, env, '视频已下载\n')
  475. # 播放量大于 50000,直接下载
  476. elif int(video_info_dict["play_cnt"]) >= 30000:
  477. Common.logger(log_type, crawler).info(f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
  478. Common.logging(log_type, crawler, env, f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
  479. cls.download(log_type=log_type,
  480. crawler=crawler,
  481. video_info_dict=video_info_dict,
  482. rule_dict=rule_dict,
  483. our_uid=our_uid,
  484. env=env)
  485. # 上升榜判断逻辑,任意时间段上升量>=5000,连续两个时间段上升量>=2000
  486. elif int(update_video_info['ten_play_cnt']) >= 3000 or int(
  487. update_video_info['fifteen_play_cnt']) >= 3000 or int(update_video_info['twenty_play_cnt']) >= 3000:
  488. Common.logger(log_type, crawler).info(f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
  489. Common.logging(log_type, crawler, env, f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
  490. Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
  491. Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
  492. cls.download(log_type=log_type,
  493. crawler=crawler,
  494. video_info_dict=video_info_dict,
  495. rule_dict=rule_dict,
  496. our_uid=our_uid,
  497. env=env)
  498. elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['fifteen_play_cnt']) >= 1000:
  499. Common.logger(log_type, crawler).info(f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
  500. Common.logging(log_type, crawler, env, f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
  501. Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
  502. Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
  503. cls.download(log_type=log_type,
  504. crawler=crawler,
  505. video_info_dict=video_info_dict,
  506. rule_dict=rule_dict,
  507. our_uid=our_uid,
  508. env=env)
  509. elif int(update_video_info['fifteen_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
  510. Common.logger(log_type, crawler).info(
  511. f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
  512. Common.logging(log_type, crawler, env, f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
  513. Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
  514. Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
  515. cls.download(log_type=log_type,
  516. crawler=crawler,
  517. video_info_dict=video_info_dict,
  518. rule_dict=rule_dict,
  519. our_uid=our_uid,
  520. env=env)
  521. elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
  522. Common.logger(log_type, crawler).info(
  523. f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
  524. Common.logging(log_type, crawler, env, f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
  525. Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
  526. Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
  527. cls.download(log_type=log_type,
  528. crawler=crawler,
  529. video_info_dict=video_info_dict,
  530. rule_dict=rule_dict,
  531. our_uid=our_uid,
  532. env=env)
  533. else:
  534. Common.logger(log_type, crawler).info("上升量不满足下载规则")
  535. Common.logging(log_type, crawler, env, "上升量不满足下载规则")
  536. if __name__ == "__main__":
  537. print(get_config_from_mysql(log_type='hour', source='xiaoniangao', env='dev', text='filter'))
  538. pass