zhangbo 1 éve
szülő
commit
d8c915de93
4 módosított fájl, 218 hozzáadás és 9 törlés
  1. 2 2
      config.py
  2. 45 2
      parameter_update.py
  3. 145 1
      utils.py
  4. 26 4
      video_recall.py

+ 2 - 2
config.py

@@ -2014,8 +2014,8 @@ class DevelopmentConfig(BaseConfig):
 
     # 用户已观看视频过滤 & 视频审核条件过滤 & 是否进入老年人社区过滤 & 话题状态过滤 接口地址
     # 参数types: 1-已观看 2-视频状态 3-是否进入老年人社区过滤 4-话题状态
-    VIDEO_FILTER_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/filterVideos'
-    # VIDEO_FILTER_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/recommend/filterVideos'
+    # VIDEO_FILTER_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/filterVideos'
+    VIDEO_FILTER_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/recommend/filterVideos'
 
     # 获取视频在流量池中的剩余可分发数接口地址
     GET_REMAIN_VIEW_COUNT_URL = 'http://testapi-internal.piaoquantv.com/flowpool/video/remainViewCount'

+ 45 - 2
parameter_update.py

@@ -2,15 +2,19 @@ from my_utils import parse_json_for_risk_rule
 from my_utils import parse_json_for_risk_videos
 from db_helper import RedisHelper
 from config import set_config
-
-
+import json
+from log import Log
 config_ = set_config()
+log_ = Log()
 
 RISK_SHIELD_FILTER_RULE_V1_JSON = "RISK_SHIELD_FILTER_RULE_V1_JSON"
 RISK_SHIELD_FILTER_VIDEO_V1_STR = "RISK_SHIELD_FILTER_VIDEO_V1_STR"
 RISK_SHIELD_FILTER_EXPANSION_FACTOR_INT = "RISK_SHIELD_FILTER_EXPANSION_FACTOR_INT"
 RISK_SHIELD_FILTER_FLAG_BOOL = "RISK_SHIELD_FILTER_FLAG_BOOL"
 
+TAGS_FILTER_FLAG_BOOL = "TAGS_FILTER_FLAG_BOOL"
+TAGS_FILTER_RULE_V1_JSON = "TAGS_FILTER_RULE_V1_JSON"
+
 def param_update_risk_rule() -> dict:
     """
     定时更新风险过滤的规则
@@ -73,6 +77,42 @@ def param_update_risk_filter_flag() -> bool:
             data = False
     return data
 
+def param_update_filter_flags() -> [bool, bool]:
+    """
+    用getbatch的方式从redis中取所有filter的flag
+    key1 = "RISK_SHIELD_FILTER_FLAG_BOOL"
+    key2 = "TAGS_FILTER_FLAG_BOOL"
+    value = "False"
+    """
+    redis_helper = RedisHelper()
+    key_list = [
+        RISK_SHIELD_FILTER_FLAG_BOOL,
+        TAGS_FILTER_FLAG_BOOL
+    ]
+    tmp = redis_helper.get_batch_key(name_list=key_list)
+    data1 = False
+    data2 = False
+    if tmp is not None:
+        try:
+            data1 = True if tmp[0].lower() == "true" else False
+        except Exception as e:
+            data1 = False
+    if tmp is not None:
+        try:
+            data2 = True if tmp[1].lower() == "true" else False
+        except Exception as e:
+            data2 = False
+    return [data1, data2]
+
+def param_update_rule(redis_helper: RedisHelper):
+    tmp = redis_helper.get_data_from_redis(key_name=TAGS_FILTER_RULE_V1_JSON)
+    if tmp is not None:
+        try:
+            data = json.loads(tmp)
+            return data
+        except Exception as e:
+            log_.error("{}: parse json is wrong with in param_update_rule:{}".format(e, tmp))
+    return {}
 if __name__ == '__main__':
     pass
     d1 = param_update_risk_rule()
@@ -83,5 +123,8 @@ if __name__ == '__main__':
     d4 = param_update_risk_filter_flag()
     print(d3, type(d3))
     print(d4, type(d4))
+    redis_helper = RedisHelper()
+    d5 = param_update_rule(redis_helper)
+    print(d5, type(d5))
 
 

+ 145 - 1
utils.py

@@ -892,7 +892,7 @@ class FilterVideos(object):
         else:
             return video_ids[:min(self.force_truncation, len(video_ids))]
 
-    def filter_videos_for_group(self, region_code=None, videos=None):
+    def filter_videos_for_group(self, region_code=None, videos=None, video_tag_dict=None, tags_rule=None):
         """视频过滤"""
         videos_filtered = self.filter_videos_with_risk_video(videos, self.app_type, region_code)
         filtered_pre_result = self.filter_video_previewed(videos_filtered)
@@ -901,9 +901,153 @@ class FilterVideos(object):
         filtered_viewed_result = self.filter_video_viewed_status(video_ids=filtered_pre_result)
         if not filtered_viewed_result:
             return None
+
         filtered_viewed_videos = [int(video_id) for video_id in filtered_viewed_result]
         return filtered_viewed_videos
 
+    def filter_videos_with_tags_rule(self, video_ids: list, video_tag_dict: list, tags_rule: dict):
+        # 1 获取当日节日信息和小时数字
+        hour = datetime.now().hour
+        date = datetime.strftime(datetime.today(), '%Y-%m-%d')
+        holiday_cn = HOLIDAY_KV[date] if date in HOLIDAY_KV.items() else ""
+
+        # 2 确认命中规则: 先处理天级别,后处理年级别
+        tag_days = ["早上好", "中午好", "下午好", "晚上好", "晚安"]
+        filter_tags = []
+        for tag_day in tag_days:
+            rules = tags_rule["早上好"] if "早上好" in tags_rule.keys() else {}
+            start = rules["start"] if "start" in rules.keys() else 0
+            end = rules["end"] if "end" in rules.keys() else 23
+            if hour < start and hour > end:
+                filter_tags.append(tag_day)
+        if len(holiday_cn) != 0:
+            rules = tags_rule[holiday_cn] if holiday_cn in tags_rule.keys() else {}
+            start = rules["start"] if "start" in rules.keys() else 0
+            end = rules["end"] if "end" in rules.keys() else 9
+            if hour < start and hour > end:
+                filter_tags.append(holiday_cn)
+        if len(filter_tags) == 0:
+            return video_ids
+        # 3 获取视频的tag 进行过滤
+        video_id_result = []
+        for _, id in enumerate(video_ids):
+            tags = video_tag_dict[id]
+            if_filter_video = not (set(filter_tags) & set(tags))
+            if if_filter_video:
+                pass
+            else:
+                video_id_result.append(id)
+        return video_id_result
+
+HOLIDAY_KV = {
+"2024-01-01":"元旦",
+"2024-01-18":"腊八节",
+"2024-02-02":"小年",
+"2024-02-03":"小年",
+"2024-02-09":"除夕",
+"2024-02-10":"春节",
+"2024-02-14":"情人节",
+"2024-02-24":"元宵节",
+"2024-03-11":"龙抬头",
+"2024-03-08":"妇女节",
+"2024-05-01":"劳动节",
+"2024-05-12":"母亲节",
+"2024-06-01":"儿童节",
+"2024-06-10":"端午节",
+"2024-06-16":"父亲节",
+"2024-07-01":"建党节",
+"2024-07-07":"七七事变",
+"2024-08-01":"建军节",
+"2024-08-10":"七夕节",
+"2024-08-18":"中元节",
+"2024-09-17":"中秋节",
+"2024-09-09":"毛主席逝世",
+"2024-10-01":"国庆节",
+"2024-10-11":"重阳节",
+"2024-11-28":"感恩节",
+"2024-12-13":"公祭日",
+"2024-12-24":"平安夜",
+"2024-12-25":"圣诞节",
+"2024-12-26":"毛主席诞辰",
+"2024-01-06":"小寒",
+"2024-01-20":"大寒",
+"2024-02-04":"立春",
+"2024-02-19":"雨水",
+"2024-03-05":"惊蛰",
+"2024-03-20":"春分",
+"2024-04-04":"清明",
+"2024-04-19":"谷雨",
+"2024-05-05":"立夏",
+"2024-05-20":"小满",
+"2024-06-05":"芒种",
+"2024-06-21":"夏至",
+"2024-07-06":"小暑",
+"2024-07-22":"大暑",
+"2024-08-07":"立秋",
+"2024-08-22":"处暑",
+"2024-09-07":"白露",
+"2024-09-22":"秋分",
+"2024-10-08":"寒露",
+"2024-10-23":"霜降",
+"2024-11-07":"立冬",
+"2024-11-22":"小雪",
+"2024-12-06":"大雪",
+"2024-12-21":"冬至",
+"2025-01-01":"元旦",
+"2025-01-07":"腊八节",
+"2025-01-22":"小年",
+"2025-01-23":"小年",
+"2025-01-28":"除夕",
+"2025-01-29":"春节",
+"2025-02-14":"情人节",
+"2025-02-22":"元宵节",
+"2025-03-01":"龙抬头",
+"2025-03-08":"妇女节",
+"2025-05-01":"劳动节",
+"2025-05-11":"母亲节",
+"2025-06-01":"儿童节",
+"2025-05-31":"端午节",
+"2025-06-15":"父亲节",
+"2025-07-01":"建党节",
+"2054-07-07":"七七事变",
+"2025-08-01":"建军节",
+"2025-08-29":"七夕节",
+"2025-09-06":"中元节",
+"2025-10-06":"中秋节",
+"2025-09-09":"毛主席逝世",
+"2025-10-01":"国庆节",
+"2025-10-29":"重阳节",
+"2024-11-27":"感恩节",
+"2025-12-13":"公祭日",
+"2025-12-24":"平安夜",
+"2025-12-25":"圣诞节",
+"2025-12-26":"毛主席诞辰",
+"2025-01-05":"小寒",
+"2025-01-20":"大寒",
+"2025-02-03":"立春",
+"2025-02-18":"雨水",
+"2025-03-05":"惊蛰",
+"2025-03-20":"春分",
+"2025-04-04":"清明",
+"2025-04-20":"谷雨",
+"2025-05-05":"立夏",
+"2025-05-21":"小满",
+"2025-06-05":"芒种",
+"2025-06-21":"夏至",
+"2025-07-07":"小暑",
+"2025-07-22":"大暑",
+"2025-08-07":"立秋",
+"2025-08-23":"处暑",
+"2025-09-07":"白露",
+"2025-09-23":"秋分",
+"2025-10-08":"寒露",
+"2025-10-23":"霜降",
+"2025-11-07":"立冬",
+"2025-11-22":"小雪",
+"2025-12-07":"大雪",
+"2025-12-21":"冬至",
+}
+
 if __name__ == '__main__':
     user = [
         ('weixin_openid_o0w175fDc8pNnywrYN49E341tKfI', ''),

+ 26 - 4
video_recall.py

@@ -11,9 +11,10 @@ import gevent
 import json
 import sys
 from parameter_update import param_update_expansion_factor
-from parameter_update import param_update_risk_filter_flag
+from parameter_update import param_update_filter_flags
 from parameter_update import param_update_risk_rule
 from parameter_update import param_update_risk_videos
+from parameter_update import param_update_rule
 
 log_ = Log()
 config_ = set_config()
@@ -52,14 +53,17 @@ class PoolRecall(object):
         self.h_rule_key = h_rule_key
 
         self.expansion_factor = param_update_expansion_factor()
-        self.risk_filter_flag = param_update_risk_filter_flag()
+        [self.risk_filter_flag, self.tags_filter_flag] = param_update_filter_flags()
         if self.risk_filter_flag:
             self.app_region_filtered = param_update_risk_rule()
             self.videos_with_risk = param_update_risk_videos()
         else:
             self.app_region_filtered = {}
             self.videos_with_risk = []
-
+        if self.tags_filter_flag:
+            self.tags_filter_rule = param_update_rule(self.redis_helper)
+        else:
+            self.tags_filter_rule = {}
 
 
     def copy_redis_zset_data(self, from_key_name, to_key_name):
@@ -3959,7 +3963,13 @@ class PoolRecall(object):
                 if len(data2_list) > 0:
                     data_for_filter.extend(
                         [data2_list[i:i + group_size] for i in range(0, len(data2_list), group_size)])
+            else:
+                data2_list = []
             data_for_filter = [i for i in data_for_filter if len(i) > 0]
+
+            # 3.9 获取item的tag特征
+            video_tag_dict = self.get_video_tags(list(set(data1_list) | set(data2_list)))
+
             # 4 视频过滤
             filter_ = FilterVideos(request_id=self.request_id,
                                    app_type=self.app_type, mid=self.mid, uid=self.uid, video_ids=None,
@@ -3969,7 +3979,7 @@ class PoolRecall(object):
                                    videos_with_risk=self.videos_with_risk
                                    )
             region_code = self.get_region_code()
-            t = [gevent.spawn(filter_.filter_videos_for_group, region_code, videos) for videos in data_for_filter]
+            t = [gevent.spawn(filter_.filter_videos_for_group, region_code, videos, video_tag_dict, self.tags_filter_rule) for videos in data_for_filter]
             gevent.joinall(t)
             result_list = [i.get() for i in t if i.get() is not None and len(i.get()) > 0]
             # 5 返回结果
@@ -3977,6 +3987,7 @@ class PoolRecall(object):
             for g in result_list:
                 for v in g:
                     results.append({
+                        'tags': video_tag_dict[v],
                         'videoId': v, 'flowPool': '',
                         'rovScore': 0.0, 'pushFrom': config_.PUSH_FROM['recall_strategy_trend_v1'],
                         'abCode': self.ab_code
@@ -3985,4 +3996,15 @@ class PoolRecall(object):
         except Exception as e:
             log_.error("error in recall_strategy_trend_v1:{}".format(e))
         return []
+    def get_video_tags(self, video_ids) -> dict:
+        REDIS_PREFIX = "alg_recsys_video_tags_"
+        redis_keys = [REDIS_PREFIX + str(i) for i in video_ids]
+        video_tags = self.redis_helper.get_batch_key(redis_keys)
+        video_tag_dict = {}
+        for i, tags_str in enumerate(video_tags):
+            tags = []
+            if tags_str is not None and len(tags_str) != 0:
+                tags = str(tags_str).split(",")
+            video_tag_dict[video_ids[i]] = tags
+        return video_tag_dict