#! /usr/bin/env python # -*- coding: utf-8 -*- # vim:fenc=utf-8 # # Copyright © 2024 StrayWarrior """ Common functions for growth jobs """ import pandas as pd from my_utils import request_post UNSAFE_VIDEO_IDS = [14403867, 13696461, 13671819, 13587868, 13680796, 14050873, 26348326, 28623786] AUDIT_APPROVED_GH_IDS = ['gh_b63b9dde3f4b','gh_330ef0db846d','gh_330ef0db846d','gh_e2318164f869','gh_620af8e24fb9', 'gh_620af8e24fb9','gh_133c36b99b14','gh_133c36b99b14','gh_5ac72e2b9130','gh_ef8ade0fad92', 'gh_1e03b6de22bf','gh_5538fe297e59','gh_8c6fffcbaac1','gh_8c6fffcbaac1','gh_d0e830b7547e', 'gh_fb234f4e32a5','gh_84c5d01a61e7','gh_87c4b8ae885e','gh_29d8a63d5e5e','gh_b144210318e5', 'gh_b144210318e5','gh_1f9bf4cfa788','gh_4f47d12bbe04','gh_8c6af276df98','gh_1f16bc6ac60d', 'gh_4920bc4c5720','gh_5177a8c57917','gh_5177a8c57917','gh_5e3e6cd5e35c','gh_5e3e6cd5e35c', 'gh_d2c72bcc05c9','gh_d2c72bcc05c9','gh_5f2400da935c','gh_5f2400da935c','gh_669555ebea28', 'gh_28ce883486c3','gh_28ce883486c3','gh_7057ef30222b','gh_7057ef30222b','gh_b0048adc0b46', 'gh_6e61a2d5db85','gh_01cd19465b39','gh_01cd19465b39','gh_126c99b39cea','gh_4a1174e36ceb', 'gh_f81c27eb8c48','gh_f81c27eb8c48','gh_3170dc15e246','gh_1ccfb5620605','gh_315be76a746d', 'gh_4f47d12bbe04','gh_4f47d12bbe04','gh_4f47d12bbe04'] def check_unsafe_video(df, force_replace=True): unsafe_video_condition = ','.join([str(x) for x in UNSAFE_VIDEO_IDS]) unsafe_rows = df.query(f'video_id in ({unsafe_video_condition})') if len(unsafe_rows) > 0: print(unsafe_rows) if not force_replace: raise Exception("video unsafe") df.loc[unsafe_rows.index, 'video_id'] = 20463342 def filter_unsafe_video(df): unsafe_video_condition = ','.join([str(x) for x in UNSAFE_VIDEO_IDS]) df = df.query(f'video_id not in ({unsafe_video_condition})') return df def filter_audit_failed_video(df): video_id_list = df['video_id'].tolist() chunk_size = 20 result = [video_id_list[i:i + chunk_size] for i in range(0, len(video_id_list), chunk_size)] video_audit_failure = [] for chunk in result: json_structure = { "videoIdList": chunk } result = request_post('https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo', json_structure) if result is None: continue if result['code'] != 0: continue for item in result['data']: if item['auditStatus'] != 5 or item['appAuditStatus'] != 5: video_audit_failure.append(item['id']) condition = (df['video_id'].isin(video_audit_failure)) & (df['gh_id'].isin(AUDIT_APPROVED_GH_IDS)) filtered_df = df[~condition] return filtered_df