|
@@ -3,6 +3,9 @@ import traceback
|
|
|
import requests
|
|
|
import json
|
|
|
import time
|
|
|
+import gevent
|
|
|
+import pandas as pd
|
|
|
+import random
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
@@ -370,6 +373,7 @@ class FilterVideos(object):
|
|
|
:param video_ids: 需过滤的视频列表 type-list
|
|
|
:return: filtered_videos 过滤后的列表 type-list
|
|
|
"""
|
|
|
+ pre_time = time.time()
|
|
|
if not self.mid or self.mid == 'null':
|
|
|
|
|
|
return video_ids
|
|
@@ -386,6 +390,7 @@ class FilterVideos(object):
|
|
|
return video_ids
|
|
|
pe_videos = [int(video) for video in pe_videos_list]
|
|
|
filtered_videos = [video_id for video_id in video_ids if video_id not in pe_videos]
|
|
|
+ print(f"pre res: {filtered_videos}\nexecute_time: {(time.time() - pre_time) * 1000}")
|
|
|
return filtered_videos
|
|
|
|
|
|
|
|
@@ -453,6 +458,43 @@ class FilterVideos(object):
|
|
|
filtered_videos = result['data']
|
|
|
return filtered_videos
|
|
|
|
|
|
+ def filter_video_viewed_new(self, video_ids):
|
|
|
+ """
|
|
|
+ 调用后端接口过滤用户已观看视频
|
|
|
+ :param video_ids: 视频id列表 type-list
|
|
|
+ :param types: 过滤参数 type-tuple, 默认(1, )
|
|
|
+ 1-已观看 2-视频状态 3-是否进入老年人社区 4-话题状态 5-推荐状态 6-白名单过滤 7-涉政视频过滤
|
|
|
+ :return: filtered_videos
|
|
|
+ """
|
|
|
+
|
|
|
+ st_time = time.time()
|
|
|
+ types = config_.FILTER_VIEWED_TYPES_CONFIG.get(self.app_type, None)
|
|
|
+ print(types)
|
|
|
+ if types is None:
|
|
|
+ types = config_.FILTER_VIEWED_TYPES_CONFIG.get('other')
|
|
|
+ if 6 in types:
|
|
|
+ types = list(types)
|
|
|
+ types.remove(6)
|
|
|
+ print(types)
|
|
|
+ request_data = {"appType": self.app_type,
|
|
|
+ "mid": self.mid,
|
|
|
+ "uid": self.uid,
|
|
|
+ "types": list(types),
|
|
|
+ "videoIds": video_ids}
|
|
|
+
|
|
|
+ result = request_post(request_url=config_.VIDEO_FILTER_URL, request_data=request_data, timeout=(0.1, 1))
|
|
|
+ print(f"view res: {result}\nexecute_time: {(time.time() - st_time) * 1000}")
|
|
|
+ if result is None:
|
|
|
+
|
|
|
+ return []
|
|
|
+
|
|
|
+ if result['code'] != 0:
|
|
|
+
|
|
|
+ return []
|
|
|
+
|
|
|
+ filtered_videos = result['data']
|
|
|
+ return filtered_videos
|
|
|
+
|
|
|
def filter_shield_video(self, video_ids, shield_key_name_list):
|
|
|
"""
|
|
|
过滤屏蔽视频视频
|
|
@@ -476,10 +518,111 @@ class FilterVideos(object):
|
|
|
|
|
|
return video_ids
|
|
|
|
|
|
+ def filter_movie_religion_video(self, video_ids):
|
|
|
+ """过滤白名单视频(影视,宗教)"""
|
|
|
+
|
|
|
+
|
|
|
+ st_time = time.time()
|
|
|
+ if self.app_type not in [config_.APP_TYPE['WAN_NENG_VIDEO'],
|
|
|
+ config_.APP_TYPE['LAO_HAO_KAN_VIDEO'],
|
|
|
+ config_.APP_TYPE['ZUI_JING_QI'],
|
|
|
+ config_.APP_TYPE['H5']]:
|
|
|
+
|
|
|
+ keys = [f"rov.filter.movie.{video_id}" for video_id in video_ids]
|
|
|
+ elif self.app_type in [config_.APP_TYPE['WAN_NENG_VIDEO'],
|
|
|
+ config_.APP_TYPE['ZUI_JING_QI'],
|
|
|
+ config_.APP_TYPE['H5']]:
|
|
|
+
|
|
|
+ keys = [f"rov.filter.religion.{video_id}" for video_id in video_ids]
|
|
|
+ else:
|
|
|
+ print(f"m_r res: {video_ids}\nexecute_time: {(time.time() - st_time) * 1000}")
|
|
|
+ return video_ids
|
|
|
+ redis_helper = RedisHelper(redis_info=config_.REDIS_INFO_FILTER)
|
|
|
+ filter_videos = []
|
|
|
+ for i in range(len(keys)//1000 + 1):
|
|
|
+ video_ids_temp = video_ids[i*1000:(i+1)*1000]
|
|
|
+ if len(video_ids_temp) == 0:
|
|
|
+ break
|
|
|
+ mget_res = redis_helper.mget(keys=keys[i*1000:(i+1)*1000])
|
|
|
+ filter_videos.extend([int(data) for data in mget_res if data != 'nil'])
|
|
|
+ if len(filter_videos) > 0:
|
|
|
+ filtered_videos = set(video_ids) - set(filter_videos)
|
|
|
+ print(f"m_r res: {list(filtered_videos)}\nexecute_time: {(time.time() - st_time) * 1000}")
|
|
|
+ return list(filtered_videos)
|
|
|
+ else:
|
|
|
+ print(f"m_r res: {video_ids}\nexecute_time: {(time.time() - st_time) * 1000}")
|
|
|
+ return video_ids
|
|
|
+
|
|
|
+ def filter_videos_new(self, pool_type='rov', region_code=None, shield_config=None):
|
|
|
+ """视频过滤"""
|
|
|
+
|
|
|
+ st_pre = time.time()
|
|
|
+ filtered_pre_result = self.filter_video_previewed(self.video_ids)
|
|
|
+ if not filtered_pre_result:
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+ st_viewed = time.time()
|
|
|
+ t = [
|
|
|
+ gevent.spawn(self.filter_video_viewed_new, filtered_pre_result),
|
|
|
+ gevent.spawn(self.filter_movie_religion_video, filtered_pre_result)]
|
|
|
+ gevent.joinall(t)
|
|
|
+ filtered_result_list = [i.get() for i in t]
|
|
|
+ filtered_viewed_result = list(set(filtered_result_list[0]) & set(filtered_result_list[1]))
|
|
|
+ print(f"view&m_r res: {filtered_viewed_result}\nexecute_time: {(time.time() - st_viewed) * 1000}")
|
|
|
+ if not filtered_viewed_result:
|
|
|
+ return None
|
|
|
+ filtered_viewed_videos = [int(video_id) for video_id in filtered_viewed_result]
|
|
|
+ if pool_type == 'flow':
|
|
|
+
|
|
|
+ if region_code is None or shield_config is None:
|
|
|
+ return filtered_viewed_videos
|
|
|
+ else:
|
|
|
+ shield_key_name_list = shield_config.get(region_code, None)
|
|
|
+ if shield_key_name_list is not None:
|
|
|
+ filtered_shield_video_ids = self.filter_shield_video(
|
|
|
+ video_ids=filtered_viewed_videos, shield_key_name_list=shield_key_name_list
|
|
|
+ )
|
|
|
+ log_.info({
|
|
|
+ 'logTimestamp': int(time.time() * 1000),
|
|
|
+ 'pool_type': pool_type,
|
|
|
+ 'request_id': self.request_id,
|
|
|
+ 'app_type': self.app_type,
|
|
|
+ 'mid': self.mid,
|
|
|
+ 'uid': self.uid,
|
|
|
+ 'operation': 'shield_filter',
|
|
|
+ 'request_videos': filtered_viewed_videos,
|
|
|
+ 'shield_filter_result': filtered_shield_video_ids,
|
|
|
+ 'executeTime': (time.time() - st_viewed) * 1000
|
|
|
+ })
|
|
|
+ return filtered_shield_video_ids
|
|
|
+ else:
|
|
|
+ return filtered_viewed_videos
|
|
|
+ else:
|
|
|
+ return filtered_viewed_videos
|
|
|
+
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
-
|
|
|
-
|
|
|
+ user = [
|
|
|
+ ('weixin_openid_o0w175fDc8pNnywrYN49E341tKfI', ''),
|
|
|
+ ('weixin_openid_o0w175YwC3hStzcR5DAQdbgzdMeI', ''),
|
|
|
+ ('weixin_openid_o0w175ftZDl6VJVDx9la3WVPh7mU', '15900461'),
|
|
|
+ ('weixin_openid_o0w175SPqpCVRcp7x1XvnX4qpIvI', '19659040'),
|
|
|
+ ('weixin_openid_o0w175cOnguapyWIrDrHkOWl4oFQ', '31210128'),
|
|
|
+ ('weixin_openid_o0w175UXYId-o71e1Q3SOheYNteQ', '33099722'),
|
|
|
+ ('weixin_openid_o0w175QQ5b42AtOe50bchrFgcttA', ''),
|
|
|
+ ('weixin_openid_o0w175bgaPlfLsp3YLDKWqLWtXX8', '35371534'),
|
|
|
+ ('weixin_openid_o0w175eRpvbmV6nOhM1VTyyLICWA', '30488803'),
|
|
|
+ ('weixin_openid_o0w175TZYvG47pQkOjyJFoxQuqsw', '')
|
|
|
+ ]
|
|
|
+ video_df = pd.read_csv('/Users/liqian/Documents/liqian/work/PythonProject/test/data/videoids.csv')
|
|
|
+ videoid_list = video_df['videoid'].tolist()
|
|
|
+ for mid, uid in user:
|
|
|
+ video_ids = random.sample(videoid_list, 1000)
|
|
|
+ start_time = time.time()
|
|
|
+ filter_ = FilterVideos(request_id=f'{mid} - {uid}', app_type=0, mid=mid, uid=uid, video_ids=video_ids)
|
|
|
+ res = filter_.filter_videos_new()
|
|
|
+ print(f"res: {res}\nexecute_time: {(time.time() - start_time) * 1000}")
|
|
|
|
|
|
|
|
|
|
|
@@ -491,9 +634,11 @@ if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
|
|
- mid = "weixin_openid_obHDW5c4g3aULfCWh-68LcUSxCB"
|
|
|
+
|
|
|
|
|
|
|
|
|
- res = get_user_has30day_return(mid=mid)
|
|
|
- print(res, type(res))
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
|