zhangbo 1 год назад
Родитель
Сommit
aac6b6864b
2 измененных файлов с 198 добавлено и 148 удалено
  1. 149 139
      utils.py
  2. 49 9
      video_recall.py

+ 149 - 139
utils.py

@@ -19,6 +19,115 @@ from parameter_update import param_update_risk_filter_flag
 config_ = set_config()
 log_ = Log()
 
+HOLIDAY_KV = {
+"2023-12-25":"圣诞节",
+"2024-01-01":"元旦",
+"2024-01-18":"腊八节",
+"2024-02-02":"小年",
+"2024-02-03":"小年",
+"2024-02-09":"除夕",
+"2024-02-10":"春节",
+"2024-02-14":"情人节",
+"2024-02-24":"元宵节",
+"2024-03-11":"龙抬头",
+"2024-03-08":"妇女节",
+"2024-05-01":"劳动节",
+"2024-05-12":"母亲节",
+"2024-06-01":"儿童节",
+"2024-06-10":"端午节",
+"2024-06-16":"父亲节",
+"2024-07-01":"建党节",
+"2024-07-07":"七七事变",
+"2024-08-01":"建军节",
+"2024-08-10":"七夕节",
+"2024-08-18":"中元节",
+"2024-09-17":"中秋节",
+"2024-09-09":"毛主席逝世",
+"2024-10-01":"国庆节",
+"2024-10-11":"重阳节",
+"2024-11-28":"感恩节",
+"2024-12-13":"公祭日",
+"2024-12-24":"平安夜",
+"2024-12-25":"圣诞节",
+"2024-12-26":"毛主席诞辰",
+"2024-01-06":"小寒",
+"2024-01-20":"大寒",
+"2024-02-04":"立春",
+"2024-02-19":"雨水",
+"2024-03-05":"惊蛰",
+"2024-03-20":"春分",
+"2024-04-04":"清明",
+"2024-04-19":"谷雨",
+"2024-05-05":"立夏",
+"2024-05-20":"小满",
+"2024-06-05":"芒种",
+"2024-06-21":"夏至",
+"2024-07-06":"小暑",
+"2024-07-22":"大暑",
+"2024-08-07":"立秋",
+"2024-08-22":"处暑",
+"2024-09-07":"白露",
+"2024-09-22":"秋分",
+"2024-10-08":"寒露",
+"2024-10-23":"霜降",
+"2024-11-07":"立冬",
+"2024-11-22":"小雪",
+"2024-12-06":"大雪",
+"2024-12-21":"冬至",
+"2025-01-01":"元旦",
+"2025-01-07":"腊八节",
+"2025-01-22":"小年",
+"2025-01-23":"小年",
+"2025-01-28":"除夕",
+"2025-01-29":"春节",
+"2025-02-14":"情人节",
+"2025-02-22":"元宵节",
+"2025-03-01":"龙抬头",
+"2025-03-08":"妇女节",
+"2025-05-01":"劳动节",
+"2025-05-11":"母亲节",
+"2025-06-01":"儿童节",
+"2025-05-31":"端午节",
+"2025-06-15":"父亲节",
+"2025-07-01":"建党节",
+"2054-07-07":"七七事变",
+"2025-08-01":"建军节",
+"2025-08-29":"七夕节",
+"2025-09-06":"中元节",
+"2025-10-06":"中秋节",
+"2025-09-09":"毛主席逝世",
+"2025-10-01":"国庆节",
+"2025-10-29":"重阳节",
+"2024-11-27":"感恩节",
+"2025-12-13":"公祭日",
+"2025-12-24":"平安夜",
+"2025-12-25":"圣诞节",
+"2025-12-26":"毛主席诞辰",
+"2025-01-05":"小寒",
+"2025-01-20":"大寒",
+"2025-02-03":"立春",
+"2025-02-18":"雨水",
+"2025-03-05":"惊蛰",
+"2025-03-20":"春分",
+"2025-04-04":"清明",
+"2025-04-20":"谷雨",
+"2025-05-05":"立夏",
+"2025-05-21":"小满",
+"2025-06-05":"芒种",
+"2025-06-21":"夏至",
+"2025-07-07":"小暑",
+"2025-07-22":"大暑",
+"2025-08-07":"立秋",
+"2025-08-23":"处暑",
+"2025-09-07":"白露",
+"2025-09-23":"秋分",
+"2025-10-08":"寒露",
+"2025-10-23":"霜降",
+"2025-11-07":"立冬",
+"2025-11-22":"小雪",
+"2025-12-07":"大雪",
+"2025-12-21":"冬至",
+}
 
 def send_msg_to_feishu(msg_text):
     """发送消息到飞书"""
@@ -892,7 +1001,7 @@ class FilterVideos(object):
         else:
             return video_ids[:min(self.force_truncation, len(video_ids))]
 
-    def filter_videos_for_group(self, region_code=None, videos=None, video_tag_dict=None, tags_rule=None):
+    def filter_videos_for_group(self, region_code=None, videos=None, video_tag_dict=None, tags_rule=None, tags_filter_flag=None):
         """视频过滤"""
         videos_filtered = self.filter_videos_with_risk_video(videos, self.app_type, region_code)
         filtered_pre_result = self.filter_video_previewed(videos_filtered)
@@ -901,22 +1010,25 @@ class FilterVideos(object):
         filtered_viewed_result = self.filter_video_viewed_status(video_ids=filtered_pre_result)
         if not filtered_viewed_result:
             return None
-        result = self.filter_videos_with_tags_rule([int(video_id) for video_id in filtered_viewed_result],
-                                                   video_tag_dict, tags_rule)
-        result = [int(video_id) for video_id in result]
+
+        if tags_filter_flag:
+            result = self.filter_videos_with_tags_rule([int(video_id) for video_id in filtered_viewed_result],
+                                                       video_tag_dict, tags_rule)
+            result = [int(video_id) for video_id in result]
+        else:
+            result = [int(video_id) for video_id in filtered_viewed_result]
         return result
 
-    def filter_videos_with_tags_rule(self, video_ids: list, video_tag_dict: list, tags_rule: dict):
+    def filter_videos_with_tags_rule(self, video_ids: list, video_tag_dict: dict, tags_rule: dict):
         # 1 获取当日节日信息和小时数字
         hour = datetime.now().hour
         date = datetime.strftime(datetime.today(), '%Y-%m-%d')
-        holiday_cn = HOLIDAY_KV[date] if date in HOLIDAY_KV.items() else ""
-
+        holiday_cn = HOLIDAY_KV[date] if date in HOLIDAY_KV.keys() else ""
         # 2 确认命中规则: 先处理天级别,后处理年级别
         tag_days = ["早上好", "中午好", "下午好", "晚上好", "晚安"]
         filter_tags = []
         for tag_day in tag_days:
-            rules = tags_rule["早上好"] if "早上好" in tags_rule.keys() else {}
+            rules = tags_rule[tag_day] if tag_day in tags_rule.keys() else {}
             start = rules["start"] if "start" in rules.keys() else 0
             end = rules["end"] if "end" in rules.keys() else 23
             if hour < start or hour > end:
@@ -927,150 +1039,48 @@ class FilterVideos(object):
             end = rules["end"] if "end" in rules.keys() else 9
             if hour < start or hour > end:
                 filter_tags.append(holiday_cn)
-        print("zb:" + str(filter_tags))
         if len(filter_tags) == 0:
             return video_ids
         # 3 获取视频的tag 进行过滤
         video_id_result = []
         for _, id in enumerate(video_ids):
             tags = video_tag_dict[id]
-            if_filter_video = not (set(filter_tags) & set(tags))
-            if if_filter_video:
+            if_filter_video = set(filter_tags) & set(tags)
+            if len(if_filter_video) > 0:
                 pass
             else:
                 video_id_result.append(id)
         return video_id_result
 
-HOLIDAY_KV = {
-"2024-01-01":"元旦",
-"2024-01-18":"腊八节",
-"2024-02-02":"小年",
-"2024-02-03":"小年",
-"2024-02-09":"除夕",
-"2024-02-10":"春节",
-"2024-02-14":"情人节",
-"2024-02-24":"元宵节",
-"2024-03-11":"龙抬头",
-"2024-03-08":"妇女节",
-"2024-05-01":"劳动节",
-"2024-05-12":"母亲节",
-"2024-06-01":"儿童节",
-"2024-06-10":"端午节",
-"2024-06-16":"父亲节",
-"2024-07-01":"建党节",
-"2024-07-07":"七七事变",
-"2024-08-01":"建军节",
-"2024-08-10":"七夕节",
-"2024-08-18":"中元节",
-"2024-09-17":"中秋节",
-"2024-09-09":"毛主席逝世",
-"2024-10-01":"国庆节",
-"2024-10-11":"重阳节",
-"2024-11-28":"感恩节",
-"2024-12-13":"公祭日",
-"2024-12-24":"平安夜",
-"2024-12-25":"圣诞节",
-"2024-12-26":"毛主席诞辰",
-"2024-01-06":"小寒",
-"2024-01-20":"大寒",
-"2024-02-04":"立春",
-"2024-02-19":"雨水",
-"2024-03-05":"惊蛰",
-"2024-03-20":"春分",
-"2024-04-04":"清明",
-"2024-04-19":"谷雨",
-"2024-05-05":"立夏",
-"2024-05-20":"小满",
-"2024-06-05":"芒种",
-"2024-06-21":"夏至",
-"2024-07-06":"小暑",
-"2024-07-22":"大暑",
-"2024-08-07":"立秋",
-"2024-08-22":"处暑",
-"2024-09-07":"白露",
-"2024-09-22":"秋分",
-"2024-10-08":"寒露",
-"2024-10-23":"霜降",
-"2024-11-07":"立冬",
-"2024-11-22":"小雪",
-"2024-12-06":"大雪",
-"2024-12-21":"冬至",
-"2025-01-01":"元旦",
-"2025-01-07":"腊八节",
-"2025-01-22":"小年",
-"2025-01-23":"小年",
-"2025-01-28":"除夕",
-"2025-01-29":"春节",
-"2025-02-14":"情人节",
-"2025-02-22":"元宵节",
-"2025-03-01":"龙抬头",
-"2025-03-08":"妇女节",
-"2025-05-01":"劳动节",
-"2025-05-11":"母亲节",
-"2025-06-01":"儿童节",
-"2025-05-31":"端午节",
-"2025-06-15":"父亲节",
-"2025-07-01":"建党节",
-"2054-07-07":"七七事变",
-"2025-08-01":"建军节",
-"2025-08-29":"七夕节",
-"2025-09-06":"中元节",
-"2025-10-06":"中秋节",
-"2025-09-09":"毛主席逝世",
-"2025-10-01":"国庆节",
-"2025-10-29":"重阳节",
-"2024-11-27":"感恩节",
-"2025-12-13":"公祭日",
-"2025-12-24":"平安夜",
-"2025-12-25":"圣诞节",
-"2025-12-26":"毛主席诞辰",
-"2025-01-05":"小寒",
-"2025-01-20":"大寒",
-"2025-02-03":"立春",
-"2025-02-18":"雨水",
-"2025-03-05":"惊蛰",
-"2025-03-20":"春分",
-"2025-04-04":"清明",
-"2025-04-20":"谷雨",
-"2025-05-05":"立夏",
-"2025-05-21":"小满",
-"2025-06-05":"芒种",
-"2025-06-21":"夏至",
-"2025-07-07":"小暑",
-"2025-07-22":"大暑",
-"2025-08-07":"立秋",
-"2025-08-23":"处暑",
-"2025-09-07":"白露",
-"2025-09-23":"秋分",
-"2025-10-08":"寒露",
-"2025-10-23":"霜降",
-"2025-11-07":"立冬",
-"2025-11-22":"小雪",
-"2025-12-07":"大雪",
-"2025-12-21":"冬至",
-}
+
 
 if __name__ == '__main__':
-    user = [
-        ('weixin_openid_o0w175fDc8pNnywrYN49E341tKfI', ''),
-        ('weixin_openid_o0w175YwC3hStzcR5DAQdbgzdMeI', ''),
-        ('weixin_openid_o0w175ftZDl6VJVDx9la3WVPh7mU', '15900461'),
-        ('weixin_openid_o0w175SPqpCVRcp7x1XvnX4qpIvI', '19659040'),
-        ('weixin_openid_o0w175cOnguapyWIrDrHkOWl4oFQ', '31210128'),
-        ('weixin_openid_o0w175UXYId-o71e1Q3SOheYNteQ', '33099722'),
-        ('weixin_openid_o0w175QQ5b42AtOe50bchrFgcttA', ''),
-        ('weixin_openid_o0w175bgaPlfLsp3YLDKWqLWtXX8', '35371534'),
-        ('weixin_openid_o0w175eRpvbmV6nOhM1VTyyLICWA', '30488803'),
-        ('weixin_openid_o0w175TZYvG47pQkOjyJFoxQuqsw', '')
-    ]
-    video_df = pd.read_csv('./data/videoids.csv')
-    videoid_list = video_df['videoid'].tolist()
-    for mid, uid in user:
-        video_ids = random.sample(videoid_list, 1000)
-        start_time = time.time()
-        filter_ = FilterVideos(request_id=f'{mid} - {uid}', app_type=0, mid=mid, uid=uid, video_ids=video_ids)
-        res = filter_.filter_videos_new()
-        print(f"res: {res}\nexecute_time: {(time.time() - start_time) * 1000}")
+    pass
+    # video_ids = [17736990, 17734880, 17734759, 17726977]
+    # video_tag_dict = {17734880: ['国庆节'], 17726977: ['圣诞节'], 17736990: ['早上好'], 17734759: ['晚上好']}
+    # tags_rule = {'早上好': {'start': 0, 'end': 9}, '中午好': {'start': 11, 'end': 13}, '冬至': {'start': 0, 'end': 9}, '祝福': {'start': 0, 'end': 23}}
+    # f = FilterVideos("request_id", "app_type", video_ids)
+    # f.filter_videos_with_tags_rule(video_ids, video_tag_dict, tags_rule)
+    # user = [
+    #     ('weixin_openid_o0w175fDc8pNnywrYN49E341tKfI', ''),
+    #     ('weixin_openid_o0w175YwC3hStzcR5DAQdbgzdMeI', ''),
+    #     ('weixin_openid_o0w175ftZDl6VJVDx9la3WVPh7mU', '15900461'),
+    #     ('weixin_openid_o0w175SPqpCVRcp7x1XvnX4qpIvI', '19659040'),
+    #     ('weixin_openid_o0w175cOnguapyWIrDrHkOWl4oFQ', '31210128'),
+    #     ('weixin_openid_o0w175UXYId-o71e1Q3SOheYNteQ', '33099722'),
+    #     ('weixin_openid_o0w175QQ5b42AtOe50bchrFgcttA', ''),
+    #     ('weixin_openid_o0w175bgaPlfLsp3YLDKWqLWtXX8', '35371534'),
+    #     ('weixin_openid_o0w175eRpvbmV6nOhM1VTyyLICWA', '30488803'),
+    #     ('weixin_openid_o0w175TZYvG47pQkOjyJFoxQuqsw', '')
+    # ]
+    # video_df = pd.read_csv('./data/videoids.csv')
+    # videoid_list = video_df['videoid'].tolist()
+    # for mid, uid in user:
+    #     video_ids = random.sample(videoid_list, 1000)
+    #     start_time = time.time()
+    #     filter_ = FilterVideos(request_id=f'{mid} - {uid}', app_type=0, mid=mid, uid=uid, video_ids=video_ids)
+    #     res = filter_.filter_videos_new()
+    #     print(f"res: {res}\nexecute_time: {(time.time() - start_time) * 1000}")
     # filter_.filter_video_status(video_ids=[1, 3, 5])
 
     # videos = [{'videoId': 9034659, 'flowPool': '3#11#3#1637824188547'}, {'videoId': 9035052, 'flowPool': '3#11#3#1637824172827'}]

+ 49 - 9
video_recall.py

@@ -534,6 +534,8 @@ class PoolRecall(object):
                     video_mapping[video_id] = [flow_pool]
                 else:
                     video_mapping[video_id].append(flow_pool)
+            # todo zhangbo 获取tags
+            video_tag_dict = self.get_video_tags(video_ids)
             # 过滤
             filter_ = FilterVideos(request_id=self.request_id,
                                    app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids,
@@ -579,7 +581,8 @@ class PoolRecall(object):
                         flow_pool_recall_result.append(
                             {'videoId': video_id, 'flowPool': flow_pool,
                              'rovScore': video_score[video_id], 'pushFrom': config_.PUSH_FROM['flow_recall'],
-                             'abCode': self.ab_code, 'flow_pool_abtest_group': flow_pool_abtest_group}
+                             'abCode': self.ab_code, 'flow_pool_abtest_group': flow_pool_abtest_group,
+                             "tags": video_tag_dict[video_id]}
                         )
 
                         flow_pool_recall_videos.append(video_id)
@@ -795,6 +798,8 @@ class PoolRecall(object):
                     video_mapping[video_id] = [flow_pool]
                 else:
                     video_mapping[video_id].append(flow_pool)
+            # todo zhangbo 获取tags
+            video_tag_dict = self.get_video_tags(video_ids)
             # 过滤
             filter_ = FilterVideos(request_id=self.request_id,
                                    app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids,
@@ -840,7 +845,8 @@ class PoolRecall(object):
                         flow_pool_recall_result.append(
                             {'videoId': video_id, 'flowPool': flow_pool, 'level': level,
                              'rovScore': random.uniform(0, 100), 'pushFrom': config_.PUSH_FROM['flow_recall'],
-                             'abCode': self.ab_code, 'flow_pool_abtest_group': flow_pool_abtest_group}
+                             'abCode': self.ab_code, 'flow_pool_abtest_group': flow_pool_abtest_group,
+                             "tags": video_tag_dict[video_id]}
                         )
 
                         flow_pool_recall_videos.append(video_id)
@@ -1090,6 +1096,8 @@ class PoolRecall(object):
                     check_result_items.append([video_id, flow_pool, score])
             check_result_items = sorted(check_result_items, key=lambda x: x[2], reverse=True)
             to_filter_videos = [item[0] for item in check_result_items[:get_size]]
+            # todo zhangbo 获取tags
+            video_tag_dict = self.get_video_tags(to_filter_videos)
             # 过滤
             filter_ = FilterVideos(request_id=self.request_id,
                                    app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=to_filter_videos,
@@ -1113,7 +1121,8 @@ class PoolRecall(object):
                         {'videoId': video_id, 'flowPool': check_result_mapping[video_id][0], 'level': level,
                          'rovScore': check_result_mapping[video_id][1],
                          'pushFrom': config_.PUSH_FROM['flow_recall'],
-                         'abCode': self.ab_code, 'flow_pool_abtest_group': flow_pool_abtest_group}
+                         'abCode': self.ab_code, 'flow_pool_abtest_group': flow_pool_abtest_group,
+                         "tags": video_tag_dict[video_id]}
                     )
 
         return flow_pool_recall_result[:size], flow_pool_recall_process
@@ -2861,6 +2870,10 @@ class PoolRecall(object):
                 video_id = int(value[0])
                 video_ids.append(video_id)
                 video_score[video_id] = value[1]
+
+            # todo zhangbo 增加tags特征
+            video_tag_dict = self.get_video_tags(video_ids)
+
             # 过滤
             filter_ = FilterVideos(request_id=self.request_id,
                                    app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=video_ids,
@@ -2877,7 +2890,8 @@ class PoolRecall(object):
             if filtered_result:
                 # 添加视频源参数 pushFrom, abCode
                 temp_result = [{'videoId': int(item), 'rovScore': video_score[int(item)],
-                                'pushFrom': push_from, 'abCode': self.ab_code}
+                                'pushFrom': push_from, 'abCode': self.ab_code,
+                                'tags': video_tag_dict[item]}
                                for item in filtered_result if video_score.get(int(item)) is not None]
                 pool_recall_result.extend(temp_result)
             # else:
@@ -3152,6 +3166,18 @@ class PoolRecall(object):
         #print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
 
+        # todo zhangbo 获取tags
+        video_ids_4tags = []
+        if data is not None:
+            json_result = json.loads(data)
+            for per_item in json_result:
+                try:
+                    vid = int(per_item[0])
+                    video_ids_4tags.append(vid)
+                except Exception as e:
+                    continue
+        video_tag_dict = self.get_video_tags(video_ids_4tags)
+
         #print(data)
         recall_result = []
         recall_dict  = {}
@@ -3165,7 +3191,7 @@ class PoolRecall(object):
                     video_ids.append(vid)
                     recall_dict[vid] = {'videoId': vid, 'flowPool': '',
                          'rovScore': per_item[1], 'pushFrom': config_.PUSH_FROM['sim_hot_vid_recall'],
-                         'abCode': self.ab_code}
+                         'abCode': self.ab_code, "tags": video_tag_dict[vid]}
                 except Exception as e:
                     continue
         if len(video_ids)<=0:
@@ -3837,6 +3863,18 @@ class PoolRecall(object):
         #print("recall_key:", recall_key)
         data = self.redis_helper.get_data_from_redis(key_name=recall_key)
 
+        # todo zhangbo 获取tags
+        video_ids_4tag = []
+        if data is not None and data!="" :
+            try:
+                json_result = json.loads(data)
+                for per_item in json_result:
+                    vid = int(per_item[0])
+                    video_ids_4tag.append(vid)
+            except Exception as e:
+                video_ids_4tag = []
+        video_tag_dict = self.get_video_tags(video_ids_4tag)
+
         #print(data)
         recall_result = []
         recall_dict  = {}
@@ -3849,7 +3887,7 @@ class PoolRecall(object):
                     video_ids.append(vid)
                     recall_dict[vid] = {'videoId': vid, 'flowPool': '',
                          'rovScore': float(per_item[1]), 'pushFrom': config_.PUSH_FROM['return_video_recall'],
-                         'abCode': self.ab_code}
+                         'abCode': self.ab_code, "tags": video_tag_dict[vid]}
             except Exception as e:
                 return recall_result
         if len(video_ids)<=0:
@@ -3966,10 +4004,10 @@ class PoolRecall(object):
             else:
                 data2_list = []
             data_for_filter = [i for i in data_for_filter if len(i) > 0]
-
+            # data_for_filter.append([17736990, 17734880, 17734759, 17726977])
+            # data1_list.extend([17736990, 17734880, 17734759, 17726977])
             # 3.9 获取item的tag特征
             video_tag_dict = self.get_video_tags(list(set(data1_list) | set(data2_list)))
-
             # 4 视频过滤
             filter_ = FilterVideos(request_id=self.request_id,
                                    app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=None,
@@ -3979,7 +4017,8 @@ class PoolRecall(object):
                                    videos_with_risk=self.videos_with_risk
                                    )
             region_code = self.get_region_code()
-            t = [gevent.spawn(filter_.filter_videos_for_group, region_code, videos, video_tag_dict, self.tags_filter_rule) for videos in data_for_filter]
+            t = [gevent.spawn(filter_.filter_videos_for_group, region_code, videos, video_tag_dict,
+                              self.tags_filter_rule, self.tags_filter_flag) for videos in data_for_filter]
             gevent.joinall(t)
             result_list = [i.get() for i in t if i.get() is not None and len(i.get()) > 0]
             # 5 返回结果
@@ -3992,6 +4031,7 @@ class PoolRecall(object):
                         'rovScore': 0.0, 'pushFrom': config_.PUSH_FROM['recall_strategy_trend_v1'],
                         'abCode': self.ab_code
                     })
+            print("results:" + str(results))
             return results
         except Exception as e:
             log_.error("error in recall_strategy_trend_v1:{}".format(e))