videos_filter.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812
  1. import time
  2. import json
  3. import traceback
  4. import ast
  5. from datetime import date, timedelta, datetime
  6. from region_rule_rank_h import region_code
  7. from utils import filter_video_status, send_msg_to_feishu, filter_video_status_app
  8. from db_helper import RedisHelper
  9. from config import set_config
  10. from log import Log
  11. config_, env = set_config()
  12. log_ = Log()
  13. def filter_position_videos():
  14. """按位置排序视频过滤"""
  15. log_.info("position videos filter start...")
  16. position_key_list = [config_.RECALL_POSITION1_KEY_NAME, config_.RECALL_POSITION2_KEY_NAME]
  17. redis_helper = RedisHelper()
  18. for key_name in position_key_list:
  19. position = key_name.split('.')[-1]
  20. log_.info("position = {}".format(position))
  21. # 获取数据
  22. position_videos = redis_helper.get_data_from_redis(key_name=key_name)
  23. if position_videos is None:
  24. log_.info('position {} videos is None!'.format(position))
  25. continue
  26. else:
  27. # 过滤
  28. position_video_ids = [int(video_id) for video_id in ast.literal_eval(position_videos)]
  29. filter_video_ids = filter_video_status(video_ids=position_video_ids)
  30. # 重新写入redis
  31. redis_helper.set_data_to_redis(key_name=key_name,
  32. value=str(filter_video_ids),
  33. expire_time=30 * 3600)
  34. log_.info('position {} videos filter end!'.format(position))
  35. log_.info("position videos filter end!")
  36. def filter_relevant_videos():
  37. """运营强插相关推荐视频过滤"""
  38. log_.info("relevant videos with op filter filter start...")
  39. # 读取需要过滤的头部视频id
  40. redis_helper = RedisHelper()
  41. head_videos = redis_helper.get_data_from_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME)
  42. if head_videos is None or len(head_videos) == 0:
  43. log_.info("relevant videos with op filter end! head_videos = {}".format(head_videos))
  44. return
  45. # 过滤
  46. remove_head_vids = []
  47. for head_vid in head_videos:
  48. key_name = '{}{}'.format(config_.RELEVANT_VIDEOS_WITH_OP_KEY_NAME, head_vid)
  49. # 头部视频 对应的key不存在时,将head_vid移除对应redis
  50. if not redis_helper.key_exists(key_name=key_name):
  51. remove_head_vids.append(head_vid)
  52. log_.info('head_vid = {} relevant redis key not exist!'.format(head_vid))
  53. continue
  54. # 获取头部视频对应的相关视频
  55. relevant_videos = redis_helper.get_data_from_redis(key_name=key_name)
  56. # 该视频没有指定的相关性视频,将head_vid移除对应redis
  57. if relevant_videos is None:
  58. remove_head_vids.append(head_vid)
  59. log_.info('head_vid = {} not have relevant videos!'.format(head_vid))
  60. continue
  61. # 过滤
  62. relevant_videos = json.loads(relevant_videos)
  63. relevant_video_ids = [int(item['recommend_vid']) for item in relevant_videos]
  64. filtered_videos = filter_video_status(video_ids=relevant_video_ids)
  65. # 保留可推荐 且生效中 的视频
  66. relevant_videos_new = [
  67. item for item in relevant_videos
  68. if int(item['recommend_vid']) in filtered_videos and int(item['finish_time']) > int(time.time())
  69. ]
  70. # 过滤后没有符合的视频,将head_vid移除对应redis,删除对应的相关推荐的key
  71. if len(relevant_videos_new) == 0:
  72. remove_head_vids.append(head_vid)
  73. redis_helper.del_keys(key_name=key_name)
  74. log_.info('head_vid = {} filtered finished! new relevant videos count = {}'.format(
  75. head_vid, len(relevant_videos_new)))
  76. continue
  77. # 重新写入redis
  78. # 以最晚结束的视频的结束时间 - 当前时间 + 5s 作为key的过期时间
  79. finish_time_list = [item['finish_time'] for item in relevant_videos_new]
  80. expire_time = max(finish_time_list) - int(time.time()) + 5
  81. if expire_time <= 0:
  82. log_.info('head_vid = {} expire_time <= 0!'.format(head_vid))
  83. continue
  84. # 存入redis
  85. redis_helper.set_data_to_redis(key_name=key_name,
  86. value=json.dumps(relevant_videos_new),
  87. expire_time=expire_time)
  88. log_.info('head_vid = {} filtered finished! new relevant videos count = {}'.format(
  89. head_vid, len(relevant_videos_new)))
  90. # 将需要移除的头部视频id进行移除
  91. if len(remove_head_vids) == 0:
  92. log_.info('head videos remove finished! remove_head_vids = {}'.format(remove_head_vids))
  93. log_.info("relevant videos with op filter end!")
  94. return
  95. redis_helper.remove_value_from_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME, values=tuple(remove_head_vids))
  96. log_.info('head videos remove finished! remove_head_vids = {}'.format(remove_head_vids))
  97. log_.info("relevant videos with op filter end!")
  98. def filter_rov_pool(app_type=None):
  99. """ROV召回池视频过滤"""
  100. log_.info("rov recall pool filter start ...")
  101. # 拼接redis-key
  102. if app_type is None:
  103. key_name, _ = get_pool_redis_key(pool_type='rov')
  104. else:
  105. log_.info("appType = {}".format(app_type))
  106. key_name, _ = get_pool_redis_key(pool_type='rov', app_type=app_type)
  107. # 获取视频
  108. redis_helper = RedisHelper()
  109. data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
  110. if data is None:
  111. log_.info("data is None")
  112. log_.info("rov recall pool filter end!")
  113. return
  114. # 过滤
  115. video_ids = [int(video_id) for video_id in data]
  116. if app_type == config_.APP_TYPE['APP']:
  117. filtered_result = filter_video_status_app(video_ids=video_ids)
  118. else:
  119. filtered_result = filter_video_status(video_ids=video_ids)
  120. # 求差集,获取需要过滤掉的视频,并从redis中移除
  121. filter_videos = set(video_ids) - set(filtered_result)
  122. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  123. len(filtered_result),
  124. len(filter_videos)))
  125. log_.info({'key_name': key_name, 'filter_videos': filter_videos})
  126. if len(filter_videos) == 0:
  127. log_.info("rov recall pool filter end!")
  128. return
  129. redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
  130. log_.info("rov recall pool filter end!")
  131. def filter_flow_pool():
  132. """流量池视频过滤"""
  133. log_.info("flow pool filter start ...")
  134. app_type_list = [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]
  135. for _, app_type in config_.APP_TYPE.items():
  136. log_.info('app_type {} videos filter start...'.format(app_type))
  137. if app_type in app_type_list:
  138. filter_flow_pool_18_19(app_type=app_type)
  139. else:
  140. # 拼接redis-key
  141. key_name = get_pool_redis_key(pool_type='flow', app_type=app_type)
  142. # 获取视频
  143. redis_helper = RedisHelper()
  144. data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
  145. if data is None:
  146. log_.info("data is None")
  147. log_.info("app_type {} videos filter end!".format(app_type))
  148. continue
  149. # videoId与flowPool做mapping
  150. video_ids = []
  151. mapping = {}
  152. for video in data:
  153. video_id, flow_pool = video.split('-')
  154. video_id = int(video_id)
  155. if video_id not in video_ids:
  156. video_ids.append(video_id)
  157. mapping[video_id] = [flow_pool]
  158. else:
  159. mapping[video_id].append(flow_pool)
  160. # 过滤
  161. if len(video_ids) == 0:
  162. log_.info("data size = {}, video_ids size = {}, data = {}".format(len(data), len(video_ids), data))
  163. log_.info("app_type {} videos filter end!".format(app_type))
  164. continue
  165. if app_type == config_.APP_TYPE['APP']:
  166. filtered_result = filter_video_status_app(video_ids=video_ids)
  167. else:
  168. filtered_result = filter_video_status(video_ids=video_ids)
  169. # 求差集,获取需要过滤掉的视频,并从redis中移除
  170. filter_videos = set(video_ids) - set(filtered_result)
  171. log_.info("data size = {}, video_ids size = {}, filtered size = {}, filter sizer = {}".format(
  172. len(data), len(video_ids), len(filtered_result), len(filter_videos)))
  173. # 移除
  174. if len(filter_videos) == 0:
  175. log_.info("app_type {} videos filter end!".format(app_type))
  176. continue
  177. remove_videos = ['{}-{}'.format(video_id, flow_pool)
  178. for video_id in filter_videos
  179. for flow_pool in mapping[video_id]]
  180. redis_helper.remove_value_from_zset(key_name=key_name, value=remove_videos)
  181. log_.info("app_type {} videos filter end!".format(app_type))
  182. log_.info("flow pool filter end!")
  183. def filter_flow_pool_18_19(app_type):
  184. """流量池视频过滤"""
  185. log_.info('app_type {} videos filter start...'.format(app_type))
  186. # 拼接redis-key
  187. key_name = get_pool_redis_key(pool_type='flow', app_type=app_type)
  188. # 获取视频
  189. redis_helper = RedisHelper()
  190. data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
  191. if data is None:
  192. log_.info("data is None")
  193. log_.info("app_type {} videos filter end!".format(app_type))
  194. return
  195. video_ids = [int(video_id) for video_id in data]
  196. # 过滤
  197. if len(video_ids) == 0:
  198. log_.info("data size = {}, video_ids size = {}, data = {}".format(len(data), len(video_ids), data))
  199. log_.info("app_type {} videos filter end!".format(app_type))
  200. return
  201. filtered_result = filter_video_status(video_ids=video_ids)
  202. # 求差集,获取需要过滤掉的视频,并从redis中移除
  203. filter_videos = set(video_ids) - set(filtered_result)
  204. log_.info("data size = {}, video_ids size = {}, filtered size = {}, filter sizer = {}".format(
  205. len(data), len(video_ids), len(filtered_result), len(filter_videos)))
  206. log_.info({'key_name': key_name, 'filter_videos': filter_videos})
  207. # 移除
  208. if len(filter_videos) == 0:
  209. log_.info("app_type {} videos filter end!".format(app_type))
  210. return
  211. redis_helper.remove_value_from_zset(key_name=key_name, value=filter_videos)
  212. log_.info("app_type {} videos filter end!".format(app_type))
  213. log_.info("flow pool filter end!")
  214. def filter_bottom():
  215. """兜底视频过滤"""
  216. log_.info("bottom videos filter start ...")
  217. # 获取视频
  218. redis_helper = RedisHelper()
  219. data = redis_helper.get_data_zset_with_index(key_name=config_.BOTTOM_KEY_NAME, start=0, end=-1)
  220. if data is None:
  221. log_.info("data is None")
  222. log_.info("bottom videos filter end!")
  223. return
  224. # 过滤
  225. video_ids = [int(video_id) for video_id in data]
  226. filtered_result = filter_video_status(video_ids=video_ids)
  227. # 求差集,获取需要过滤掉的视频,并从redis中移除
  228. filter_videos = set(video_ids) - set(filtered_result)
  229. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  230. len(filtered_result),
  231. len(filter_videos)))
  232. if len(filter_videos) == 0:
  233. log_.info("bottom videos filter end!")
  234. return
  235. redis_helper.remove_value_from_zset(key_name=config_.BOTTOM_KEY_NAME, value=list(filter_videos))
  236. log_.info("bottom videos filter end!")
  237. def filter_rov_updated():
  238. """修改过ROV的视频过滤"""
  239. log_.info("update rov videos filter start ...")
  240. # 获取视频
  241. redis_helper = RedisHelper()
  242. data = redis_helper.get_data_zset_with_index(key_name=config_.UPDATE_ROV_KEY_NAME, start=0, end=-1)
  243. if data is None:
  244. log_.info("data is None")
  245. log_.info("update rov videos filter end!")
  246. return
  247. # 过滤
  248. video_ids = [int(video_id) for video_id in data]
  249. filtered_result = filter_video_status(video_ids=video_ids)
  250. # 求差集,获取需要过滤掉的视频,并从redis中移除
  251. filter_videos = set(video_ids) - set(filtered_result)
  252. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  253. len(filtered_result),
  254. len(filter_videos)))
  255. log_.info({'key_name': config_.UPDATE_ROV_KEY_NAME, 'filter_videos': filter_videos})
  256. if len(filter_videos) == 0:
  257. log_.info("update rov videos filter end!")
  258. return
  259. redis_helper.remove_value_from_zset(key_name=config_.UPDATE_ROV_KEY_NAME, value=list(filter_videos))
  260. log_.info("update rov videos filter end!")
  261. def filter_rov_updated_app():
  262. """修改过ROV的视频过滤-app推荐状态过滤"""
  263. log_.info("update rov videos app filter start ...")
  264. # 获取视频
  265. redis_helper = RedisHelper()
  266. data = redis_helper.get_data_zset_with_index(key_name=config_.UPDATE_ROV_KEY_NAME_APP, start=0, end=-1)
  267. if data is None:
  268. log_.info("data is None")
  269. log_.info("update rov videos app filter end!")
  270. return
  271. # 过滤
  272. video_ids = [int(video_id) for video_id in data]
  273. filtered_result = filter_video_status_app(video_ids=video_ids)
  274. # 求差集,获取需要过滤掉的视频,并从redis中移除
  275. filter_videos = set(video_ids) - set(filtered_result)
  276. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  277. len(filtered_result),
  278. len(filter_videos)))
  279. if len(filter_videos) == 0:
  280. log_.info("update rov videos app filter end!")
  281. return
  282. redis_helper.remove_value_from_zset(key_name=config_.UPDATE_ROV_KEY_NAME_APP, value=list(filter_videos))
  283. log_.info("update rov videos app filter end!")
  284. def get_pool_redis_key(pool_type, app_type=None):
  285. """
  286. 拼接key
  287. :param pool_type: type-string {'rov': rov召回池, 'flow': 流量池}
  288. :param app_type: 产品标识
  289. :return: key_name
  290. """
  291. redis_helper = RedisHelper()
  292. if pool_type == 'rov':
  293. # appType = 6
  294. if app_type == config_.APP_TYPE['SHORT_VIDEO']:
  295. # 获取当前所在小时
  296. redis_date = datetime.now().hour
  297. # 判断热度列表是否更新,未更新则使用前一小时的热度列表
  298. key_name = '{}{}.{}'.format(config_.RECALL_KEY_NAME_PREFIX_APP_TYPE, app_type, redis_date)
  299. if redis_helper.key_exists(key_name):
  300. return key_name, redis_date
  301. else:
  302. if redis_date == 0:
  303. redis_date = 23
  304. else:
  305. redis_date = redis_date - 1
  306. key_name = '{}{}.{}'.format(config_.RECALL_KEY_NAME_PREFIX_APP_TYPE, app_type, redis_date)
  307. return key_name, redis_date
  308. # appType: [18, 19]
  309. elif app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
  310. key_name_prefix = f'{config_.RECALL_KEY_NAME_PREFIX_APP_TYPE}{app_type}.'
  311. now_date = datetime.today().strftime('%Y%m%d')
  312. now_h = datetime.now().hour
  313. key_name = f"{key_name_prefix}{now_date}.{now_h}"
  314. if redis_helper.key_exists(key_name):
  315. return key_name, now_h
  316. else:
  317. if now_h == 0:
  318. redis_h = 23
  319. redis_date = (datetime.today() - timedelta(days=1)).strftime('%Y%m%d')
  320. else:
  321. redis_h = now_h - 1
  322. redis_date = now_date
  323. key_name = f"{key_name_prefix}{redis_date}.{redis_h}"
  324. return key_name, redis_h
  325. else:
  326. # appType = 13 票圈视频app
  327. if app_type == config_.APP_TYPE['APP']:
  328. key_name_prefix = config_.RECALL_KEY_NAME_PREFIX_APP
  329. # # appType: [18, 19]
  330. # elif app_type in [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]:
  331. # key_name_prefix = f'{config_.RECALL_KEY_NAME_PREFIX_APP_TYPE}{app_type}.'
  332. # 其他
  333. else:
  334. key_name_prefix = config_.RECALL_KEY_NAME_PREFIX
  335. # 判断热度列表是否更新,未更新则使用前一天的热度列表
  336. key_name = key_name_prefix + time.strftime('%Y%m%d')
  337. if redis_helper.key_exists(key_name):
  338. redis_date = date.today().strftime('%Y%m%d')
  339. else:
  340. redis_date = (date.today() - timedelta(days=1)).strftime('%Y%m%d')
  341. key_name = key_name_prefix + redis_date
  342. return key_name, redis_date
  343. elif pool_type == 'flow':
  344. # 流量池
  345. return config_.FLOWPOOL_KEY_NAME_PREFIX + str(app_type)
  346. else:
  347. log_.error('pool type error')
  348. return None, None
  349. def filter_app_pool():
  350. """过滤票圈视频APP小时级数据"""
  351. log_.info("app pool filter start ...")
  352. redis_helper = RedisHelper()
  353. # 获取当前日期
  354. now_date = date.today().strftime('%Y%m%d')
  355. # 获取当前所在小时
  356. now_h = datetime.now().hour
  357. log_.info(f'now_date = {now_date}, now_h = {now_h}.')
  358. if now_h < 7:
  359. redis_date = (date.today() - timedelta(days=1)).strftime('%Y%m%d')
  360. redis_h = 21
  361. elif now_h > 21:
  362. redis_date = now_date
  363. redis_h = 21
  364. else:
  365. if now_h % 2 == 0:
  366. redis_date = now_date
  367. redis_h = now_h - 1
  368. else:
  369. redis_date = now_date
  370. redis_h = now_h
  371. log_.info(f'redis_date = {redis_date}, redis_h = {redis_h}.')
  372. # 拼接key
  373. key_name = f'{config_.APP_FINAL_RECALL_KEY_NAME_PREFIX}{redis_date}.{redis_h}'
  374. # 获取视频
  375. data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
  376. if data is None:
  377. log_.info("data is None")
  378. log_.info("app pool filter end!")
  379. return
  380. # 过滤
  381. video_ids = [int(video_id) for video_id in data]
  382. filtered_result = filter_video_status_app(video_ids=video_ids)
  383. # 求差集,获取需要过滤掉的视频,并从redis中移除
  384. filter_videos = set(video_ids) - set(filtered_result)
  385. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  386. len(filtered_result),
  387. len(filter_videos)))
  388. log_.info({'key_name': key_name, 'filter_videos': filter_videos})
  389. if len(filter_videos) == 0:
  390. log_.info("app pool filter end!")
  391. return
  392. redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
  393. log_.info("app pool filter end!")
  394. def filter_rov_h():
  395. """过滤小程序小时级数据"""
  396. rule_params = config_.RULE_PARAMS
  397. log_.info("rov_h pool filter start ...")
  398. redis_helper = RedisHelper()
  399. # 获取当前日期
  400. now_date = date.today().strftime('%Y%m%d')
  401. # 获取当前所在小时
  402. now_h = datetime.now().hour
  403. log_.info(f'now_date = {now_date}, now_h = {now_h}.')
  404. for key, value in rule_params.items():
  405. log_.info(f"rule = {key}, param = {value}")
  406. # 需过滤两个视频列表
  407. key_prefix_list = [
  408. config_.RECALL_KEY_NAME_PREFIX_BY_H,
  409. config_.RECALL_KEY_NAME_PREFIX_DUP_24H_H,
  410. config_.RECALL_KEY_NAME_PREFIX_DUP_H
  411. ]
  412. for i, key_prefix in enumerate(key_prefix_list):
  413. # 拼接key
  414. key_name = f"{key_prefix}{key}.{now_date}.{now_h}"
  415. log_.info(f"key_name: {key_name}")
  416. # 获取视频
  417. data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
  418. if data is None:
  419. log_.info("data is None")
  420. log_.info("filter end!")
  421. continue
  422. # 过滤
  423. video_ids = [int(video_id) for video_id in data]
  424. filtered_result = filter_video_status(video_ids=video_ids)
  425. # 求差集,获取需要过滤掉的视频,并从redis中移除
  426. filter_videos = set(video_ids) - set(filtered_result)
  427. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  428. len(filtered_result),
  429. len(filter_videos)))
  430. log_.info({'key_name': key_name, 'filter_videos': filter_videos})
  431. if len(filter_videos) == 0:
  432. log_.info("filter end!")
  433. continue
  434. redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
  435. if i == 0:
  436. # 将小时级的数据需要过滤的视频加入到线上过滤应用列表中
  437. redis_helper.add_data_with_set(key_name=f"{config_.H_VIDEO_FILER}{key}",
  438. values=filter_videos, expire_time=2*3600)
  439. log_.info("rov_h pool filter end!")
  440. def filter_rov_day():
  441. """过滤小程序天级数据"""
  442. rule_params = config_.RULE_PARAMS_DAY
  443. log_.info("rov_day pool filter start ...")
  444. redis_helper = RedisHelper()
  445. # 获取当前日期
  446. now_date = date.today().strftime('%Y%m%d')
  447. log_.info(f'now_date = {now_date}.')
  448. for key, value in rule_params.items():
  449. log_.info(f"rule = {key}, param = {value}")
  450. # 需过滤三个视频列表
  451. key_prefix_list = [
  452. config_.RECALL_KEY_NAME_PREFIX_BY_DAY,
  453. config_.RECALL_KEY_NAME_PREFIX_DUP_DAY_PRE,
  454. config_.RECALL_KEY_NAME_PREFIX_DUP_DAY_NOW
  455. ]
  456. for i, key_prefix in enumerate(key_prefix_list):
  457. # 拼接key
  458. key_name = f"{key_prefix}{key}.{now_date}"
  459. log_.info(f"key_name: {key_name}")
  460. # 获取视频
  461. data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
  462. if data is None:
  463. log_.info("data is None")
  464. log_.info("filter end!")
  465. continue
  466. # 过滤
  467. video_ids = [int(video_id) for video_id in data]
  468. filtered_result = filter_video_status(video_ids=video_ids)
  469. # 求差集,获取需要过滤掉的视频,并从redis中移除
  470. filter_videos = set(video_ids) - set(filtered_result)
  471. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  472. len(filtered_result),
  473. len(filter_videos)))
  474. log_.info({'key_name': key_name, 'filter_videos': filter_videos})
  475. if len(filter_videos) == 0:
  476. log_.info("filter end!")
  477. continue
  478. redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
  479. log_.info("rov_day pool filter end!")
  480. def filter_old_videos():
  481. """过滤老视频数据"""
  482. log_.info("old videos filter start ...")
  483. redis_helper = RedisHelper()
  484. # 获取当前日期
  485. now_date = date.today().strftime('%Y%m%d')
  486. log_.info(f'now_date = {now_date}.')
  487. # 拼接key
  488. key_name = f'{config_.RECALL_KEY_NAME_PREFIX_OLD_VIDEOS}{now_date}'
  489. # 获取视频
  490. data = redis_helper.get_data_from_set(key_name=key_name)
  491. if data is None:
  492. log_.info("data is None")
  493. log_.info("old videos filter end!")
  494. return
  495. # 过滤
  496. video_ids = [int(video_id) for video_id in data]
  497. filtered_result = filter_video_status(video_ids=video_ids)
  498. # 求差集,获取需要过滤掉的视频,并从redis中移除
  499. filter_videos = set(video_ids) - set(filtered_result)
  500. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  501. len(filtered_result),
  502. len(filter_videos)))
  503. if len(filter_videos) == 0:
  504. log_.info("old videos filter end!")
  505. return
  506. redis_helper.remove_value_from_set(key_name=key_name, values=filter_videos)
  507. log_.info("old videos filter end!")
  508. def filter_region_videos():
  509. """过滤地域分组规则视频"""
  510. region_code_list = [code for region, code in region_code.items()]
  511. rule_params = config_.RULE_PARAMS_REGION
  512. log_.info("region_h videos filter start ...")
  513. redis_helper = RedisHelper()
  514. # 获取当前日期
  515. now_date = date.today().strftime('%Y%m%d')
  516. # 获取当前所在小时
  517. now_h = datetime.now().hour
  518. log_.info(f'now_date = {now_date}, now_h = {now_h}.')
  519. for region in region_code_list:
  520. log_.info(f"region = {region}")
  521. for key, value in rule_params.items():
  522. log_.info(f"rule = {key}, param = {value}")
  523. # 需过滤视频列表
  524. key_prefix_list = [
  525. config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H,
  526. config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H,
  527. # config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_DAY_H,
  528. config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_DAY_H,
  529. config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H
  530. ]
  531. for i, key_prefix in enumerate(key_prefix_list):
  532. # 拼接key
  533. key_name = f"{key_prefix}{region}.{key}.{now_date}.{now_h}"
  534. log_.info(f"key_name: {key_name}")
  535. # 获取视频
  536. data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
  537. if data is None:
  538. log_.info("data is None")
  539. log_.info("filter end!")
  540. continue
  541. # 过滤
  542. video_ids = [int(video_id) for video_id in data]
  543. filtered_result = filter_video_status(video_ids=video_ids)
  544. # 求差集,获取需要过滤掉的视频,并从redis中移除
  545. filter_videos = set(video_ids) - set(filtered_result)
  546. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  547. len(filtered_result),
  548. len(filter_videos)))
  549. log_.info({'key_name': key_name, 'filter_videos': filter_videos})
  550. if len(filter_videos) == 0:
  551. log_.info("filter end!")
  552. continue
  553. redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
  554. if i == 0:
  555. # 将小时级的数据需要过滤的视频加入到线上过滤应用列表中
  556. redis_helper.add_data_with_set(key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{key}",
  557. values=filter_videos, expire_time=2 * 3600)
  558. elif i == 1:
  559. # 将小时级的数据需要过滤的视频加入到线上过滤应用列表中
  560. redis_helper.add_data_with_set(key_name=f"{config_.REGION_H_VIDEO_FILER_24H}{region}.{key}",
  561. values=filter_videos, expire_time=2 * 3600)
  562. log_.info(f"region = {region} videos filter end!")
  563. log_.info("region_h videos filter end!")
  564. def filter_region_videos_by_day():
  565. """过滤地域分组天级规则视频"""
  566. region_code_list = [code for region, code in region_code.items()]
  567. rule_params = config_.RULE_PARAMS_REGION_DAY
  568. log_.info("region_day videos filter start ...")
  569. redis_helper = RedisHelper()
  570. # 获取当前日期
  571. now_date = date.today().strftime('%Y%m%d')
  572. log_.info(f'now_date = {now_date}.')
  573. for region in region_code_list:
  574. log_.info(f"region = {region}")
  575. for key, value in rule_params.items():
  576. log_.info(f"rule = {key}, param = {value}")
  577. # 需过滤视频列表
  578. key_prefix_list = [
  579. config_.RECALL_KEY_NAME_PREFIX_REGION_BY_DAY
  580. ]
  581. for i, key_prefix in enumerate(key_prefix_list):
  582. # 拼接key
  583. key_name = f"{key_prefix}{region}.{key}.{now_date}"
  584. log_.info(f"key_name: {key_name}")
  585. # 获取视频
  586. data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
  587. if data is None:
  588. log_.info("data is None")
  589. log_.info("filter end!")
  590. continue
  591. # 过滤
  592. video_ids = [int(video_id) for video_id in data]
  593. filtered_result = filter_video_status(video_ids=video_ids)
  594. # 求差集,获取需要过滤掉的视频,并从redis中移除
  595. filter_videos = set(video_ids) - set(filtered_result)
  596. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  597. len(filtered_result),
  598. len(filter_videos)))
  599. log_.info({'key_name': key_name, 'filter_videos': filter_videos})
  600. if len(filter_videos) == 0:
  601. log_.info("filter end!")
  602. continue
  603. redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
  604. log_.info(f"region = {region} videos filter end!")
  605. log_.info("region_day videos filter end!")
  606. def filter_rov_h_24h():
  607. """过滤小程序小时级更新24h数据"""
  608. rule_params = config_.RULE_PARAMS_24H
  609. log_.info("rov_h_by24h pool filter start ...")
  610. redis_helper = RedisHelper()
  611. # 获取当前日期
  612. now_date = date.today().strftime('%Y%m%d')
  613. # 获取当前所在小时
  614. now_h = datetime.now().hour
  615. log_.info(f'now_date = {now_date}, now_h = {now_h}.')
  616. for key, value in rule_params.items():
  617. log_.info(f"rule = {key}, param = {value}")
  618. # 需过滤两个视频列表
  619. key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_BY_24H, config_.RECALL_KEY_NAME_PREFIX_DUP_24H]
  620. for i, key_prefix in enumerate(key_prefix_list):
  621. # 拼接key
  622. key_name = f"{key_prefix}{key}.{now_date}.{now_h}"
  623. log_.info(f"key_name: {key_name}")
  624. # 获取视频
  625. data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
  626. if data is None:
  627. log_.info("data is None")
  628. log_.info("filter end!")
  629. continue
  630. # 过滤
  631. video_ids = [int(video_id) for video_id in data]
  632. filtered_result = filter_video_status(video_ids=video_ids)
  633. # 求差集,获取需要过滤掉的视频,并从redis中移除
  634. filter_videos = set(video_ids) - set(filtered_result)
  635. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  636. len(filtered_result),
  637. len(filter_videos)))
  638. log_.info({'key_name': key_name, 'filter_videos': filter_videos})
  639. if len(filter_videos) == 0:
  640. log_.info("filter end!")
  641. continue
  642. redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
  643. if i == 0:
  644. # 将小时级的数据需要过滤的视频加入到线上过滤应用列表中
  645. redis_helper.add_data_with_set(key_name=f"{config_.H_VIDEO_FILER_24H}{key}",
  646. values=filter_videos, expire_time=2*3600)
  647. log_.info("rov_h_by24h pool filter end!")
  648. def filter_region_videos_24h():
  649. """过滤地域分组24h规则视频"""
  650. region_code_list = [code for region, code in region_code.items()]
  651. rule_params = config_.RULE_PARAMS_REGION_24H
  652. log_.info("region_24h videos filter start ...")
  653. redis_helper = RedisHelper()
  654. # 获取当前日期
  655. now_date = date.today().strftime('%Y%m%d')
  656. # 获取当前所在小时
  657. now_h = datetime.now().hour
  658. log_.info(f'now_date = {now_date}, now_h = {now_h}.')
  659. for region in region_code_list:
  660. log_.info(f"region = {region}")
  661. for key, value in rule_params.items():
  662. log_.info(f"rule = {key}, param = {value}")
  663. # 需过滤视频列表
  664. key_prefix_list = [
  665. config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H,
  666. config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_DAY_24H,
  667. config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_24H
  668. ]
  669. for i, key_prefix in enumerate(key_prefix_list):
  670. # 拼接key
  671. key_name = f"{key_prefix}{region}.{key}.{now_date}.{now_h}"
  672. log_.info(f"key_name: {key_name}")
  673. # 获取视频
  674. data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1)
  675. if data is None:
  676. log_.info("data is None")
  677. log_.info("filter end!")
  678. continue
  679. # 过滤
  680. video_ids = [int(video_id) for video_id in data]
  681. filtered_result = filter_video_status(video_ids=video_ids)
  682. # 求差集,获取需要过滤掉的视频,并从redis中移除
  683. filter_videos = set(video_ids) - set(filtered_result)
  684. log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),
  685. len(filtered_result),
  686. len(filter_videos)))
  687. log_.info({'key_name': key_name, 'filter_videos': filter_videos})
  688. if len(filter_videos) == 0:
  689. log_.info("filter end!")
  690. continue
  691. redis_helper.remove_value_from_zset(key_name=key_name, value=list(filter_videos))
  692. if i == 0:
  693. # 将小时级的数据需要过滤的视频加入到线上过滤应用列表中
  694. redis_helper.add_data_with_set(key_name=f"{config_.REGION_H_VIDEO_FILER_24H}{region}.{key}",
  695. values=filter_videos, expire_time=2 * 3600)
  696. log_.info(f"region = {region} videos filter end!")
  697. log_.info("region_24h videos filter end!")
  698. def main():
  699. try:
  700. # ROV召回池视频过滤
  701. filter_rov_pool()
  702. # appType = 6,ROV召回池视频过滤
  703. # filter_rov_pool(app_type=config_.APP_TYPE['SHORT_VIDEO'])
  704. # appType = 13,票圈视频APP视频过滤
  705. filter_rov_pool(app_type=config_.APP_TYPE['APP'])
  706. # appType = 18, ROV召回池视频过滤
  707. filter_rov_pool(app_type=config_.APP_TYPE['LAO_HAO_KAN_VIDEO'])
  708. # appType = 19, ROV召回池视频过滤
  709. filter_rov_pool(app_type=config_.APP_TYPE['ZUI_JING_QI'])
  710. # 流量池视频过滤
  711. filter_flow_pool()
  712. # 兜底视频过滤
  713. filter_bottom()
  714. # 修改过ROV的视频过滤
  715. filter_rov_updated()
  716. filter_rov_updated_app()
  717. # 运营强插相关推荐视频过滤
  718. # filter_relevant_videos()
  719. # 按位置排序视频过滤
  720. # filter_position_videos()
  721. # 过滤票圈视频APP小时级数据
  722. filter_app_pool()
  723. # 过滤小程序小时级数据
  724. filter_rov_h()
  725. # 过滤小程序天级数据
  726. filter_rov_day()
  727. # 过滤老视频数据
  728. # filter_old_videos()
  729. # 过滤地域分组小时级视频
  730. filter_region_videos()
  731. # 过滤地域分组天级视频
  732. filter_region_videos_by_day()
  733. # 过滤小时级更新24h视频
  734. filter_rov_h_24h()
  735. # 过滤地域分组24h规则视频
  736. filter_region_videos_24h()
  737. except Exception as e:
  738. log_.error(traceback.format_exc())
  739. send_msg_to_feishu(
  740. webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
  741. key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
  742. msg_text='{} - 过滤失败 \n {}'.format(config_.ENV_TEXT, traceback.format_exc())
  743. )
  744. return
  745. if __name__ == '__main__':
  746. main()