liqian 2 years ago
parent
commit
f6f505019b
6 changed files with 327 additions and 202 deletions
  1. 21 49
      check_video_limit_distribute.py
  2. 111 21
      config.py
  3. 96 46
      region_rule_rank_h.py
  4. 40 34
      region_rule_rank_h_by24h.py
  5. 37 40
      rule_rank_h_by_24h.py
  6. 22 12
      videos_filter.py

+ 21 - 49
check_video_limit_distribute.py

@@ -155,55 +155,27 @@ def check_region_videos():
 
     for app_type, params in rule_params.items():
         log_.info(f"app_type = {app_type}")
-        for data_key, data_param in params['data_params'].items():
-            log_.info(f"data_key = {data_key}, data_param = {data_param}")
-            for rule_key, rule_param in params['rule_params'].items():
-                log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-                task_list = [
-                    gevent.spawn(process_with_region, app_type, data_key, rule_key, region,
-                                 stop_distribute_video_id_list, now_date, now_h)
-                    for region in region_code_list
-                ]
-                gevent.joinall(task_list)
-
-                # for region in region_code_list:
-                #     log_.info(f"app_type = {app_type}, data_key = {data_key}, rule_key = {rule_key}, region = {region}")
-                #     # 将已超分发视频加入到地域小时级线上过滤应用列表中
-                #     redis_helper.add_data_with_set(
-                #         key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{app_type}.{data_key}.{rule_key}",
-                #         values=stop_distribute_video_id_list,
-                #         expire_time=2 * 3600
-                #     )
-                #     # 将已超分发视频加入到地域分组24h的数据线上过滤应用列表中
-                #     redis_helper.add_data_with_set(
-                #         key_name=f"{config_.REGION_H_VIDEO_FILER_24H}{region}.{app_type}.{data_key}.{rule_key}",
-                #         values=stop_distribute_video_id_list,
-                #         expire_time=2 * 3600
-                #     )
-                #     # 将已超分发视频加入到不区分相对24h线上过滤应用列表中
-                #     redis_helper.add_data_with_set(
-                #         key_name=f"{config_.H_VIDEO_FILER_24H}{region}.{app_type}.{data_key}.{rule_key}",
-                #         values=stop_distribute_video_id_list,
-                #         expire_time=2 * 3600
-                #     )
-                #     # 将已超分发视频 移除 大列表
-                #     key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}" \
-                #                f"{region}.{app_type}.{data_key}.{rule_key}." \
-                #                f"{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
-                #     if not redis_helper.key_exists(key_name=key_name):
-                #         if now_h == 0:
-                #             redis_date = now_date - datetime.timedelta(days=1)
-                #             redis_h = 23
-                #         else:
-                #             redis_date = now_date
-                #             redis_h = now_h - 1
-                #         key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}" \
-                #                    f"{region}.{app_type}.{data_key}.{rule_key}." \
-                #                    f"{datetime.datetime.strftime(redis_date, '%Y%m%d')}.{redis_h}"
-                #     redis_helper.remove_value_from_zset(key_name=key_name, value=stop_distribute_video_id_list)
-                #
-                #     log_.info(f"app_type = {app_type}, data_key = {data_key}, rule_key = {rule_key}, region = {region} "
-                #               f"videos check end!")
+        for param in params.get('params_list'):
+            data_key = param.get('data')
+            rule_key = param.get('rule')
+            log_.info(f"data_key = {data_key}, rule_key = {rule_key}")
+            task_list = [
+                gevent.spawn(process_with_region, app_type, data_key, rule_key, region,
+                             stop_distribute_video_id_list, now_date, now_h)
+                for region in region_code_list
+            ]
+            gevent.joinall(task_list)
+
+        # for data_key, data_param in params['data_params'].items():
+        #     log_.info(f"data_key = {data_key}, data_param = {data_param}")
+        #     for rule_key, rule_param in params['rule_params'].items():
+        #         log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
+        #         task_list = [
+        #             gevent.spawn(process_with_region, app_type, data_key, rule_key, region,
+        #                          stop_distribute_video_id_list, now_date, now_h)
+        #             for region in region_code_list
+        #         ]
+        #         gevent.joinall(task_list)
 
     # 将已超分发视频 移除 原始大列表
     key_name = f"{config_.RECALL_KEY_NAME_PREFIX}{datetime.datetime.strftime(now_date, '%Y%m%d')}"

+ 111 - 21
config.py

@@ -171,7 +171,11 @@ class BaseConfig(object):
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
                 'data2': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO']],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+                {'data': 'data2', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['LONG_VIDEO']: {
             'rule_params': {
@@ -183,7 +187,12 @@ class BaseConfig(object):
                 # 'data2': [APP_TYPE['LONG_VIDEO'], ],
                 'data3': [APP_TYPE['VLOG'], APP_TYPE['LONG_VIDEO'], ],
                 'data4': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO']],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+                {'data': 'data3', 'rule': 'rule2'},
+                {'data': 'data4', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['LOVE_LIVE']: {
             'rule_params': {
@@ -193,7 +202,10 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['SHORT_VIDEO']: {
             'rule_params': {
@@ -202,7 +214,10 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['LAO_HAO_KAN_VIDEO']: {
             'rule_params': {
@@ -211,7 +226,10 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['ZUI_JING_QI']: {
             'rule_params': {
@@ -223,7 +241,12 @@ class BaseConfig(object):
                 'data2': [APP_TYPE['VLOG'], APP_TYPE['ZUI_JING_QI']],
                 'data3': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO'],
                           APP_TYPE['ZUI_JING_QI']],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+                {'data': 'data2', 'rule': 'rule2'},
+                {'data': 'data3', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['APP']: {
             'rule_params': {
@@ -232,7 +255,15 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+                'data2': [APP_TYPE['VLOG'], APP_TYPE['APP']],
+                'data3': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO'],
+                          APP_TYPE['APP']],
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+                # {'data': 'data2', 'rule': 'rule2'},
+                # {'data': 'data3', 'rule': 'rule2'},
+            ],
         },
     }
 
@@ -250,7 +281,11 @@ class BaseConfig(object):
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
                 'data2': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO']],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+                {'data': 'data2', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['LONG_VIDEO']: {
             'rule_params': {
@@ -262,7 +297,12 @@ class BaseConfig(object):
                 # 'data2': [APP_TYPE['LONG_VIDEO'], ],
                 'data3': [APP_TYPE['VLOG'], APP_TYPE['LONG_VIDEO'], ],
                 'data4': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO']],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+                {'data': 'data3', 'rule': 'rule2'},
+                {'data': 'data4', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['LOVE_LIVE']: {
             'rule_params': {
@@ -271,7 +311,10 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['SHORT_VIDEO']: {
             'rule_params': {
@@ -280,7 +323,10 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['LAO_HAO_KAN_VIDEO']: {
             'rule_params': {
@@ -289,7 +335,10 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['ZUI_JING_QI']: {
             'rule_params': {
@@ -301,16 +350,31 @@ class BaseConfig(object):
                 'data2': [APP_TYPE['VLOG'], APP_TYPE['ZUI_JING_QI']],
                 'data3': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO'],
                           APP_TYPE['ZUI_JING_QI']],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+                {'data': 'data2', 'rule': 'rule2'},
+                {'data': 'data3', 'rule': 'rule2'},
+            ],
         },
         APP_TYPE['APP']: {
             'rule_params': {
                 'rule2': {'view_type': 'video-show', 'return_count': 21, 'score_rule': 0,
                           'platform_return_rate': 0.001},
+                'rule3': {'view_type': 'preview', 'return_count': 21, 'score_rule': 0,
+                          'platform_return_rate': 0.001},
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+                'data2': [APP_TYPE['VLOG'], APP_TYPE['APP']],
+                'data3': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO'],
+                          APP_TYPE['APP']],
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule2'},
+                # {'data': 'data2', 'rule': 'rule3'},
+                # {'data': 'data3', 'rule': 'rule3'},
+            ],
         },
     }
 
@@ -329,7 +393,11 @@ class BaseConfig(object):
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
                 'data2': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO']],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule3'},
+                {'data': 'data2', 'rule': 'rule3'},
+            ],
         },
         APP_TYPE['LONG_VIDEO']: {
             'rule_params': {
@@ -342,7 +410,12 @@ class BaseConfig(object):
                 # 'data2': [APP_TYPE['LONG_VIDEO'], ],
                 'data3': [APP_TYPE['VLOG'], APP_TYPE['LONG_VIDEO'], ],
                 'data4': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO']],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule3'},
+                {'data': 'data3', 'rule': 'rule3'},
+                {'data': 'data4', 'rule': 'rule3'},
+            ],
         },
         APP_TYPE['LOVE_LIVE']: {
             'rule_params': {
@@ -352,7 +425,10 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule3'},
+            ],
         },
         APP_TYPE['SHORT_VIDEO']: {
             'rule_params': {
@@ -362,7 +438,10 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule3'},
+            ],
         },
         APP_TYPE['LAO_HAO_KAN_VIDEO']: {
             'rule_params': {
@@ -372,7 +451,10 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule3'},
+            ],
         },
         APP_TYPE['ZUI_JING_QI']: {
             'rule_params': {
@@ -385,7 +467,12 @@ class BaseConfig(object):
                 'data2': [APP_TYPE['VLOG'], APP_TYPE['ZUI_JING_QI']],
                 'data3': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO'],
                           APP_TYPE['ZUI_JING_QI']],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule3'},
+                {'data': 'data2', 'rule': 'rule3'},
+                {'data': 'data3', 'rule': 'rule3'},
+            ],
         },
         APP_TYPE['APP']: {
             'rule_params': {
@@ -395,7 +482,10 @@ class BaseConfig(object):
             },
             'data_params': {
                 'data1': [APP_TYPE['VLOG'], ],
-            }
+            },
+            'params_list': [
+                {'data': 'data1', 'rule': 'rule3'},
+            ],
         },
     }
 

+ 96 - 46
region_rule_rank_h.py

@@ -346,23 +346,49 @@ def process_with_region(region, df_merged, app_type, data_key, rule_key, rule_pa
 
 def process_with_app_type(app_type, params, region_code_list, feature_df, now_date, now_h):
     log_.info(f"app_type = {app_type}")
+    data_params_item = params.get('data_params')
+    rule_params_item = params.get('rule_params')
     task_list = []
-    for data_key, data_param in params['data_params'].items():
+    for param in params.get('params_list'):
+        data_key = param.get('data')
+        data_param = data_params_item.get(data_key)
         log_.info(f"data_key = {data_key}, data_param = {data_param}")
         df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
         df_merged = reduce(merge_df, df_list)
-        for rule_key, rule_param in params['rule_params'].items():
-            log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-            task_list.extend(
-                [
-                    gevent.spawn(process_with_region, region, df_merged, app_type, data_key, rule_key, rule_param,
-                                 now_date, now_h)
-                    for region in region_code_list
-                ]
-            )
+
+        rule_key = param.get('rule')
+        rule_param = rule_params_item.get(rule_key)
+        log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
+        task_list.extend(
+            [
+                gevent.spawn(process_with_region, region, df_merged, app_type, data_key, rule_key, rule_param,
+                             now_date, now_h)
+                for region in region_code_list
+            ]
+        )
     gevent.joinall(task_list)
 
 
+
+    #
+    # log_.info(f"app_type = {app_type}")
+    # task_list = []
+    # for data_key, data_param in params['data_params'].items():
+    #     log_.info(f"data_key = {data_key}, data_param = {data_param}")
+    #     df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
+    #     df_merged = reduce(merge_df, df_list)
+    #     for rule_key, rule_param in params['rule_params'].items():
+    #         log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
+    #         task_list.extend(
+    #             [
+    #                 gevent.spawn(process_with_region, region, df_merged, app_type, data_key, rule_key, rule_param,
+    #                              now_date, now_h)
+    #                 for region in region_code_list
+    #             ]
+    #         )
+    # gevent.joinall(task_list)
+
+
 def rank_by_h(project, table, now_date, now_h, rule_params, region_code_list):
     # 获取特征数据
     feature_df = get_feature_data(project=project, table=table, now_date=now_date)
@@ -373,6 +399,7 @@ def rank_by_h(project, table, now_date, now_h, rule_params, region_code_list):
     ]
     gevent.joinall(t)
 
+
     # for app_type, params in rule_params.items():
     #     log_.info(f"app_type = {app_type}")
     #     for data_key, data_param in params['data_params'].items():
@@ -421,46 +448,69 @@ def h_rank_bottom(now_date, now_h, rule_params, region_code_list):
         redis_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
         redis_h = now_h - 1
 
-    # key_prefix_list = [
-    #     config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H,
-    #     config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_DAY_H,
-    #     config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_DAY_H,
-    #     config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H
-    # ]
-    # fea_df = get_feature_data(project=project, table=table, now_date=now_date - datetime.timedelta(hours=1))
-    # region_list = list(set(fea_df[''].to_list()))
-
     # 以上一小时的地域分组数据作为当前小时的数据
     key_prefix = config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H
     for app_type, params in rule_params.items():
         log_.info(f"app_type = {app_type}")
-        for data_key, data_param in params['data_params'].items():
-            log_.info(f"data_key = {data_key}, data_param = {data_param}")
-            for rule_key, rule_param in params['rule_params'].items():
-                log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-                region_24h_rule_key = rule_param.get('region_24h_rule_key', 'rule1')
-                for region in region_code_list:
-                    log_.info(f"region = {region}")
-                    key_name = f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{redis_dt}.{redis_h}"
-                    initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
-                    if initial_data is None:
-                        initial_data = []
-                    final_data = dict()
-                    h_video_ids = []
-                    for video_id, score in initial_data:
-                        final_data[video_id] = score
-                        h_video_ids.append(int(video_id))
-                    # 存入对应的redis
-                    final_key_name = \
-                        f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
-                    if len(final_data) > 0:
-                        redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
-                    # 清空线上过滤应用列表
-                    redis_helper.del_keys(key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{app_type}.{data_key}.{rule_key}")
-                    # 与其他召回视频池去重,存入对应的redis
-                    dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,
-                                 region_24h_rule_key=region_24h_rule_key, region=region,
-                                 app_type=app_type, data_key=data_key)
+        rule_params_item = params.get('rule_params')
+        for param in params.get('params_list'):
+            data_key = param.get('data')
+            rule_key = param.get('rule')
+            rule_param = rule_params_item.get(rule_key)
+            log_.info(f"data_key = {data_key}, rule_key = {rule_key}, rule_param = {rule_param}")
+            region_24h_rule_key = rule_param.get('region_24h_rule_key', 'rule1')
+            for region in region_code_list:
+                log_.info(f"region = {region}")
+                key_name = f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{redis_dt}.{redis_h}"
+                initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
+                if initial_data is None:
+                    initial_data = []
+                final_data = dict()
+                h_video_ids = []
+                for video_id, score in initial_data:
+                    final_data[video_id] = score
+                    h_video_ids.append(int(video_id))
+                # 存入对应的redis
+                final_key_name = \
+                    f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
+                if len(final_data) > 0:
+                    redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
+                # 清空线上过滤应用列表
+                redis_helper.del_keys(
+                    key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{app_type}.{data_key}.{rule_key}")
+                # 与其他召回视频池去重,存入对应的redis
+                dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,
+                             region_24h_rule_key=region_24h_rule_key, region=region,
+                             app_type=app_type, data_key=data_key)
+
+
+        # for data_key, data_param in params['data_params'].items():
+        #     log_.info(f"data_key = {data_key}, data_param = {data_param}")
+        #     for rule_key, rule_param in params['rule_params'].items():
+        #         log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
+        #         region_24h_rule_key = rule_param.get('region_24h_rule_key', 'rule1')
+        #         for region in region_code_list:
+        #             log_.info(f"region = {region}")
+        #             key_name = f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{redis_dt}.{redis_h}"
+        #             initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
+        #             if initial_data is None:
+        #                 initial_data = []
+        #             final_data = dict()
+        #             h_video_ids = []
+        #             for video_id, score in initial_data:
+        #                 final_data[video_id] = score
+        #                 h_video_ids.append(int(video_id))
+        #             # 存入对应的redis
+        #             final_key_name = \
+        #                 f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
+        #             if len(final_data) > 0:
+        #                 redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
+        #             # 清空线上过滤应用列表
+        #             redis_helper.del_keys(key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{app_type}.{data_key}.{rule_key}")
+        #             # 与其他召回视频池去重,存入对应的redis
+        #             dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,
+        #                          region_24h_rule_key=region_24h_rule_key, region=region,
+        #                          app_type=app_type, data_key=data_key)
 
 
 def h_timer_check():

+ 40 - 34
region_rule_rank_h_by24h.py

@@ -201,18 +201,24 @@ def process_with_region(region, df_merged, app_type, data_key, rule_key, rule_pa
 
 def process_with_app_type(app_type, params, region_code_list, feature_df, now_date, now_h):
     log_.info(f"app_type = {app_type}")
-    for data_key, data_param in params['data_params'].items():
+    data_params_item = params.get('data_params')
+    rule_params_item = params.get('rule_params')
+    for param in params.get('params_list'):
+        data_key = param.get('data')
+        data_param = data_params_item.get(data_key)
         log_.info(f"data_key = {data_key}, data_param = {data_param}")
         df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
         df_merged = reduce(merge_df, df_list)
-        for rule_key, rule_param in params['rule_params'].items():
-            log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-            task_list = [
-                gevent.spawn(process_with_region, region, df_merged, app_type, data_key, rule_key, rule_param,
-                             now_date, now_h)
-                for region in region_code_list
-            ]
-            gevent.joinall(task_list)
+
+        rule_key = param.get('rule')
+        rule_param = rule_params_item.get(rule_key)
+        log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
+        task_list = [
+            gevent.spawn(process_with_region, region, df_merged, app_type, data_key, rule_key, rule_param,
+                         now_date, now_h)
+            for region in region_code_list
+        ]
+        gevent.joinall(task_list)
 
 
 def rank_by_24h(project, table, now_date, now_h, rule_params, region_code_list):
@@ -315,31 +321,31 @@ def h_rank_bottom(now_date, now_h, rule_params, region_code_list):
     key_prefix = config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H
     for app_type, params in rule_params.items():
         log_.info(f"app_type = {app_type}")
-        for data_key, data_param in params['data_params'].items():
-            log_.info(f"data_key = {data_key}, data_param = {data_param}")
-            for rule_key, rule_param in params['rule_params'].items():
-                log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-                for region in region_code_list:
-                    log_.info(f"region = {region}")
-                    key_name = f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{redis_dt}.{redis_h}"
-                    initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
-                    if initial_data is None:
-                        initial_data = []
-                    final_data = dict()
-                    h_video_ids = []
-                    for video_id, score in initial_data:
-                        final_data[video_id] = score
-                        h_video_ids.append(int(video_id))
-                    # 存入对应的redis
-                    final_key_name = \
-                        f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
-                    if len(final_data) > 0:
-                        redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
-                    # 清空线上过滤应用列表
-                    redis_helper.del_keys(key_name=f"{config_.REGION_H_VIDEO_FILER_24H}{region}.{app_type}.{data_key}.{rule_key}")
-
-                    # 与其他召回视频池去重,存入对应的redis
-                    # dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key, region=region)
+        for param in params.get('params_list'):
+            data_key = param.get('data')
+            rule_key = param.get('rule')
+            log_.info(f"data_key = {data_key}, rule_key = {rule_key}")
+            for region in region_code_list:
+                log_.info(f"region = {region}")
+                key_name = f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{redis_dt}.{redis_h}"
+                initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
+                if initial_data is None:
+                    initial_data = []
+                final_data = dict()
+                h_video_ids = []
+                for video_id, score in initial_data:
+                    final_data[video_id] = score
+                    h_video_ids.append(int(video_id))
+                # 存入对应的redis
+                final_key_name = \
+                    f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
+                if len(final_data) > 0:
+                    redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
+                # 清空线上过滤应用列表
+                redis_helper.del_keys(key_name=f"{config_.REGION_H_VIDEO_FILER_24H}{region}.{app_type}.{data_key}.{rule_key}")
+
+                # 与其他召回视频池去重,存入对应的redis
+                # dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key, region=region)
 
 
 def h_timer_check():

+ 37 - 40
rule_rank_h_by_24h.py

@@ -214,31 +214,27 @@ def rank_by_h(now_date, now_h, rule_params, project, table):
     # rank
     for app_type, params in rule_params.items():
         log_.info(f"app_type = {app_type}")
-        for data_key, data_param in params['data_params'].items():
+        data_params_item = params.get('data_params')
+        rule_params_item = params.get('rule_params')
+        for param in params.get('params_list'):
+            data_key = param.get('data')
+            data_param = data_params_item.get(data_key)
             log_.info(f"data_key = {data_key}, data_param = {data_param}")
             df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
             df_merged = reduce(merge_df, df_list)
-            for rule_key, rule_param in params['rule_params'].items():
-                log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-                # 计算score
-                cal_score_func = rule_param.get('cal_score_func', 1)
-                if cal_score_func == 2:
-                    score_df = cal_score2(df=df_merged, param=rule_param)
-                else:
-                    score_df = cal_score1(df=df_merged)
-                video_rank_h(df=score_df, now_date=now_date, now_h=now_h, rule_key=rule_key, param=rule_param,
-                             app_type=app_type, data_key=data_key)
-
-
-    # for key, value in rule_params.items():
-    #     log_.info(f"rule = {key}, param = {value}")
-    #     # 计算score
-    #     cal_score_func = value.get('cal_score_func', 1)
-    #     if cal_score_func == 2:
-    #         score_df = cal_score2(df=feature_df, param=value)
-    #     else:
-    #         score_df = cal_score1(df=feature_df)
-    #     video_rank_h(df=score_df, now_date=now_date, now_h=now_h, rule_key=key, param=value)
+
+            rule_key = param.get('rule')
+            rule_param = rule_params_item.get(rule_key)
+            log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
+            # 计算score
+            cal_score_func = rule_param.get('cal_score_func', 1)
+            if cal_score_func == 2:
+                score_df = cal_score2(df=df_merged, param=rule_param)
+            else:
+                score_df = cal_score1(df=df_merged)
+            video_rank_h(df=score_df, now_date=now_date, now_h=now_h, rule_key=rule_key, param=rule_param,
+                         app_type=app_type, data_key=data_key)
+
     #     # to-csv
     #     score_filename = f"score_by24h_{key}_{datetime.strftime(now_date, '%Y%m%d%H')}.csv"
     #     score_df.to_csv(f'./data/{score_filename}')
@@ -262,24 +258,25 @@ def h_rank_bottom(now_date, now_h, rule_params):
     key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_BY_24H, config_.RECALL_KEY_NAME_PREFIX_DUP_24H]
     for app_type, params in rule_params.items():
         log_.info(f"app_type = {app_type}")
-        for data_key, data_param in params['data_params'].items():
-            log_.info(f"data_key = {data_key}, data_param = {data_param}")
-            for rule_key, rule_param in params['rule_params'].items():
-                for key_prefix in key_prefix_list:
-                    key_name = f"{key_prefix}{app_type}.{data_key}.{rule_key}.{redis_dt}.{redis_h}"
-                    initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
-                    if initial_data is None:
-                        initial_data = []
-                    final_data = dict()
-                    for video_id, score in initial_data:
-                        final_data[video_id] = score
-                    # 存入对应的redis
-                    final_key_name = \
-                        f"{key_prefix}{app_type}.{data_key}.{rule_key}.{datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
-                    if len(final_data) > 0:
-                        redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
-                    # 清空线上过滤应用列表
-                    redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER_24H}{app_type}.{data_key}.{rule_key}")
+        for param in params.get('params_list'):
+            data_key = param.get('data')
+            rule_key = param.get('rule')
+            log_.info(f"data_key = {data_key}, rule_key = {rule_key}")
+            for key_prefix in key_prefix_list:
+                key_name = f"{key_prefix}{app_type}.{data_key}.{rule_key}.{redis_dt}.{redis_h}"
+                initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
+                if initial_data is None:
+                    initial_data = []
+                final_data = dict()
+                for video_id, score in initial_data:
+                    final_data[video_id] = score
+                # 存入对应的redis
+                final_key_name = \
+                    f"{key_prefix}{app_type}.{data_key}.{rule_key}.{datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
+                if len(final_data) > 0:
+                    redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
+                # 清空线上过滤应用列表
+                redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER_24H}{app_type}.{data_key}.{rule_key}")
 
 
 def h_timer_check():

+ 22 - 12
videos_filter.py

@@ -636,16 +636,26 @@ def filter_region_videos():
     task_list = []
     for app_type, params in rule_params.items():
         log_.info(f"app_type = {app_type}")
-        for data_key, data_param in params['data_params'].items():
-            log_.info(f"data_key = {data_key}, data_param = {data_param}")
-            for rule_key, rule_param in params['rule_params'].items():
-                log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-                task_list.extend(
-                    [
-                        gevent.spawn(filter_process_with_region, app_type, data_key, rule_key, region, now_date, now_h)
-                        for region in region_code_list
-                    ]
-                )
+        for param in params.get('params_list'):
+            data_key = param.get('data')
+            rule_key = param.get('rule')
+            log_.info(f"data_key = {data_key}, rule_key = {rule_key}")
+            task_list.extend(
+                [
+                    gevent.spawn(filter_process_with_region, app_type, data_key, rule_key, region, now_date, now_h)
+                    for region in region_code_list
+                ]
+            )
+        # for data_key, data_param in params['data_params'].items():
+        #     log_.info(f"data_key = {data_key}, data_param = {data_param}")
+        #     for rule_key, rule_param in params['rule_params'].items():
+        #         log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
+        #         task_list.extend(
+        #             [
+        #                 gevent.spawn(filter_process_with_region, app_type, data_key, rule_key, region, now_date, now_h)
+        #                 for region in region_code_list
+        #             ]
+        #         )
     gevent.joinall(task_list)
     log_.info("region_h videos filter end!")
 
@@ -886,9 +896,9 @@ def main():
         # 按位置排序视频过滤
         # filter_position_videos()
         # 过滤票圈视频APP小时级数据
-        filter_app_pool()
+        # filter_app_pool()
         # 过滤小程序小时级数据
-        filter_rov_h()
+        # filter_rov_h()
         # 过滤小程序天级数据
         # filter_rov_day()
         # 过滤老视频数据