alg_growth_common.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. #! /usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # vim:fenc=utf-8
  4. #
  5. # Copyright © 2024 StrayWarrior <i@straywarrior.com>
  6. """
  7. Common functions for growth jobs
  8. """
  9. import pandas as pd
  10. from my_utils import request_post
  11. UNSAFE_VIDEO_IDS = [14403867, 13696461, 13671819, 13587868, 13680796, 14050873,
  12. 26348326, 28623786]
  13. AUDIT_APPROVED_GH_IDS = ['gh_b63b9dde3f4b','gh_330ef0db846d','gh_330ef0db846d','gh_e2318164f869','gh_620af8e24fb9',
  14. 'gh_620af8e24fb9','gh_133c36b99b14','gh_133c36b99b14','gh_5ac72e2b9130','gh_ef8ade0fad92',
  15. 'gh_1e03b6de22bf','gh_5538fe297e59','gh_8c6fffcbaac1','gh_8c6fffcbaac1','gh_d0e830b7547e',
  16. 'gh_fb234f4e32a5','gh_84c5d01a61e7','gh_87c4b8ae885e','gh_29d8a63d5e5e','gh_b144210318e5',
  17. 'gh_b144210318e5','gh_1f9bf4cfa788','gh_4f47d12bbe04','gh_8c6af276df98','gh_1f16bc6ac60d',
  18. 'gh_4920bc4c5720','gh_5177a8c57917','gh_5177a8c57917','gh_5e3e6cd5e35c','gh_5e3e6cd5e35c',
  19. 'gh_d2c72bcc05c9','gh_d2c72bcc05c9','gh_5f2400da935c','gh_5f2400da935c','gh_669555ebea28',
  20. 'gh_28ce883486c3','gh_28ce883486c3','gh_7057ef30222b','gh_7057ef30222b','gh_b0048adc0b46',
  21. 'gh_6e61a2d5db85','gh_01cd19465b39','gh_01cd19465b39','gh_126c99b39cea','gh_4a1174e36ceb',
  22. 'gh_f81c27eb8c48','gh_f81c27eb8c48','gh_3170dc15e246','gh_1ccfb5620605','gh_315be76a746d',
  23. 'gh_4f47d12bbe04','gh_4f47d12bbe04','gh_4f47d12bbe04']
  24. def check_unsafe_video(df, force_replace=True):
  25. unsafe_video_condition = ','.join([str(x) for x in UNSAFE_VIDEO_IDS])
  26. unsafe_rows = df.query(f'video_id in ({unsafe_video_condition})')
  27. if len(unsafe_rows) > 0:
  28. print(unsafe_rows)
  29. if not force_replace:
  30. raise Exception("video unsafe")
  31. df.loc[unsafe_rows.index, 'video_id'] = 20463342
  32. def filter_unsafe_video(df):
  33. unsafe_video_condition = ','.join([str(x) for x in UNSAFE_VIDEO_IDS])
  34. df = df.query(f'video_id not in ({unsafe_video_condition})')
  35. return df
  36. def filter_audit_failed_video(df):
  37. video_id_list = df['video_id'].tolist()
  38. chunk_size = 20
  39. result = [video_id_list[i:i + chunk_size] for i in range(0, len(video_id_list), chunk_size)]
  40. video_audit_failure = []
  41. for chunk in result:
  42. json_structure = {
  43. "videoIdList": chunk
  44. }
  45. result = request_post('https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo',
  46. json_structure)
  47. if result is None:
  48. continue
  49. if result['code'] != 0:
  50. continue
  51. for item in result['data']:
  52. if item['auditStatus'] != 5 or item['appAuditStatus'] != 5:
  53. video_audit_failure.append(item['id'])
  54. condition = (df['video_id'].isin(video_audit_failure)) & (df['gh_id'].isin(AUDIT_APPROVED_GH_IDS))
  55. filtered_df = df[~condition]
  56. return filtered_df
  57. if __name__ == '__main__':
  58. # 定义包含 ID 的字符串
  59. id_str = '20463342,12794884,13788955,13586800,4780859,33332362,19175397,4555247,14403867,12117356,14050873,14142458,17638023,14945734,13680796,13042177,10587660,14552795,12418493,12700411,13671819,13825547,12166346,13587868,19096953,14095344,13817005,1275943,13437896,12492103'
  60. # 将字符串按逗号分割成列表
  61. id_list = id_str.split(',')
  62. # 将列表中的元素转换为整数(如果需要)
  63. id_list = [int(id) for id in id_list]
  64. # 创建 DataFrame 并将 ID 列表放入 'video_id' 列
  65. df = pd.DataFrame({'video_id': id_list})
  66. video_audit_failure = filter_audit_failed_video(df)
  67. print(df)
  68. print(video_audit_failure)
  69. filtered_df = df[~df['video_id'].isin(video_audit_failure)]
  70. print(filtered_df)