Browse Source

Merge branch 'redis-opt-20220810' into pre-master

liqian 2 years ago
parent
commit
cece01ecac
7 changed files with 413 additions and 298 deletions
  1. 16 30
      check_video_limit_distribute.py
  2. 127 84
      config.py
  3. 24 35
      redis_data_monitor.py
  4. 88 92
      region_rule_rank_h.py
  5. 64 8
      region_rule_rank_h_by24h.py
  6. 54 18
      rule_rank_h_by_24h.py
  7. 40 31
      videos_filter.py

+ 16 - 30
check_video_limit_distribute.py

@@ -94,8 +94,8 @@ def check_videos_distribute():
     return stop_distribute_video_id_list
 
 
-def process_with_region(app_type, data_key, rule_key, region, stop_distribute_video_id_list, now_date, now_h):
-    log_.info(f"app_type = {app_type}, data_key = {data_key}, rule_key = {rule_key}, region = {region}")
+def process_with_region(data_key, rule_key, region, stop_distribute_video_id_list, now_date, now_h):
+    log_.info(f"data_key = {data_key}, rule_key = {rule_key}, region = {region}")
     # 将已超分发视频加入到地域小时级线上过滤应用列表中
     # redis_helper.add_data_with_set(
     #     key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{app_type}.{data_key}.{rule_key}",
@@ -140,7 +140,7 @@ def process_with_region(app_type, data_key, rule_key, region, stop_distribute_vi
         ]
 
     for key_prefix in key_prefix_list:
-        key_name = f"{key_prefix}{region}:{app_type}:{data_key}:{rule_key}:" \
+        key_name = f"{key_prefix}{region}:{data_key}:{rule_key}:" \
                    f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
         if not redis_helper.key_exists(key_name=key_name):
             if now_h == 0:
@@ -149,7 +149,7 @@ def process_with_region(app_type, data_key, rule_key, region, stop_distribute_vi
             else:
                 redis_date = now_date
                 redis_h = now_h - 1
-            key_name = f"{key_prefix}{region}:{app_type}:{data_key}:{rule_key}:" \
+            key_name = f"{key_prefix}{region}:{data_key}:{rule_key}:" \
                        f"{datetime.datetime.strftime(redis_date, '%Y%m%d')}:{redis_h}"
         redis_helper.remove_value_from_zset(key_name=key_name, value=stop_distribute_video_id_list)
 
@@ -188,7 +188,7 @@ def process_with_region(app_type, data_key, rule_key, region, stop_distribute_vi
     redis_helper.remove_value_from_zset(key_name=key_name, value=stop_distribute_video_id_list)
     """
 
-    log_.info(f"app_type = {app_type}, data_key = {data_key}, rule_key = {rule_key}, region = {region} "
+    log_.info(f"data_key = {data_key}, rule_key = {rule_key}, region = {region} "
               f"videos check end!")
 
 
@@ -209,31 +209,17 @@ def check_region_videos(rule_params):
 
     # 对已超分发的视频进行移除
     region_code_list = [code for region, code in config_.REGION_CODE.items()]
-    # rule_params = config_.RULE_PARAMS_REGION_APP_TYPE
-
-    for app_type, params in rule_params.items():
-        log_.info(f"app_type = {app_type}")
-        for param in params.get('params_list'):
-            data_key = param.get('data')
-            rule_key = param.get('rule')
-            log_.info(f"data_key = {data_key}, rule_key = {rule_key}")
-            task_list = [
-                gevent.spawn(process_with_region, app_type, data_key, rule_key, region,
-                             stop_distribute_video_id_list, now_date, now_h)
-                for region in region_code_list
-            ]
-            gevent.joinall(task_list)
-
-        # for data_key, data_param in params['data_params'].items():
-        #     log_.info(f"data_key = {data_key}, data_param = {data_param}")
-        #     for rule_key, rule_param in params['rule_params'].items():
-        #         log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-        #         task_list = [
-        #             gevent.spawn(process_with_region, app_type, data_key, rule_key, region,
-        #                          stop_distribute_video_id_list, now_date, now_h)
-        #             for region in region_code_list
-        #         ]
-        #         gevent.joinall(task_list)
+
+    for param in rule_params.get('params_list'):
+        data_key = param.get('data')
+        rule_key = param.get('rule')
+        log_.info(f"data_key = {data_key}, rule_key = {rule_key}")
+        task_list = [
+            gevent.spawn(process_with_region,
+                         data_key, rule_key, region, stop_distribute_video_id_list, now_date, now_h)
+            for region in region_code_list
+        ]
+        gevent.joinall(task_list)
 
     # 将已超分发视频 移除 原始大列表
     key_name = f"{config_.RECALL_KEY_NAME_PREFIX}{datetime.datetime.strftime(now_date, '%Y%m%d')}"

+ 127 - 84
config.py

@@ -105,16 +105,6 @@ class BaseConfig(object):
         'rule2': {'cal_score_func': 2, 'return_count': 100, 'platform_return_rate': 0.001},
     }
 
-    # 小时级更新过去24h数据
-    PROJECT_24H = 'loghubods'
-    TABLE_24H = 'video_data_each_hour_dataset_24h_total'
-
-    # 小时级更新过去24h数据规则参数
-    RULE_PARAMS_24H = {
-        # 'rule1': {'cal_score_func': 2, 'return_count': 100, 'platform_return_rate': 0.001},
-        'rule2': {'cal_score_func': 2, 'return_count': 100, 'platform_return_rate': 0.001, 'view_type': 'preview'},
-    }
-
     REGION_CODE = {
         '北京': '110000', '天津': '120000', '河北省': '130000', '山西省': '140000', '内蒙古': '150000',
         '辽宁省': '210000', '吉林省': '220000', '黑龙江省': '230000',
@@ -126,16 +116,6 @@ class BaseConfig(object):
         'None': '-1'
     }
 
-    # 地域分组小时级规则更新使用数据
-    PROJECT_REGION = 'loghubods'
-    TABLE_REGION = 'video_each_hour_update_province'
-
-    # 地域分组小时级规则参数
-    RULE_PARAMS_REGION = {
-        # 'rule1': {'view_type': 'pre-view', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule1'},
-        'rule2': {'view_type': 'video-show', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule2'},
-        'rule3': {'view_type': 'video-show-region', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule2'},
-    }
 
     # 地域分组天级规则更新使用数据
     PROJECT_REGION_DAY = 'loghubods'
@@ -146,35 +126,33 @@ class BaseConfig(object):
         'rule1': {'view_type': 'pre-view', 'return_count': 21, 'score_rule': 0},
     }
 
-    # 地域分组小时级更新24h使用数据
-    PROJECT_REGION_24H = 'loghubods'
-    TABLE_REGION_24H = 'video_each_day_update_province_24h_total'
-
-    # 地域分组小时级更新24h规则参数
-    RULE_PARAMS_REGION_24H = {
-        # 'rule1': {'view_type': 'pre-view', 'return_count': 21, 'score_rule': 0, 'platform_return_rate': 0.001},
-        'rule2': {'view_type': 'video-show', 'return_count': 21, 'score_rule': 0, 'platform_return_rate': 0.001},
+    # ##### 区分appType数据
+    DATA_PARAMS = {
+        'data1': [APP_TYPE['VLOG'], ],  # vlog
+        'data2': [APP_TYPE['VLOG'], APP_TYPE['LONG_VIDEO'], ],  # [vlog, 内容精选]
+        'data3': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], ],  # [vlog, 票圈视频]
+        'data4': [APP_TYPE['VLOG'], APP_TYPE['SHORT_VIDEO'], ],  # [vlog, 票圈短视频]
+        'data5': [APP_TYPE['VLOG'], APP_TYPE['ZUI_JING_QI']],  # [vlog, 最惊奇]
+        'data6': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO']],
+        'data7': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO'],
+                  APP_TYPE['APP']],
     }
 
-    # ##### 区分appType数据
     # 小时级更新过去48h数据 loghubods.video_data_each_hour_dataset_48h_total_apptype
     PROJECT_48H_APP_TYPE = 'loghubods'
     TABLE_48H_APP_TYPE = 'video_data_each_hour_dataset_48h_total_apptype'
 
     # 小时级更新过去48h数据规则参数
     RULE_PARAMS_48H_APP_TYPE = {
-        APP_TYPE['VLOG']: {
-            'rule_params': {
-                'rule1': {'cal_score_func': 2, 'return_count': 100, 'platform_return_rate': 0.001,
-                          'view_type': 'preview'},
-            },
-            'data_params': {
-                'data1': [APP_TYPE['VLOG'], ],
-            },
-            'params_list': [
-                {'data': 'data1', 'rule': 'rule1'},
-            ],
+        'rule_params': {
+            'rule1': {'cal_score_func': 2, 'return_count': 100, 'platform_return_rate': 0.001, 'view_type': 'preview'},
+        },
+        'data_params': {
+            'data1': [APP_TYPE['VLOG'], ],
         },
+        'params_list': [
+            {'data': 'data1', 'rule': 'rule1'},
+        ],
     }
 
     # 小时级更新过去24h数据 loghubods.video_data_each_hour_dataset_24h_total_apptype
@@ -182,6 +160,77 @@ class BaseConfig(object):
     TABLE_24H_APP_TYPE = 'video_data_each_hour_dataset_24h_total_apptype'
 
     # 小时级更新过去24h数据规则参数
+    RULE_PARAMS_24H_APP_TYPE = {
+        'rule_params': {
+            'rule2': {'cal_score_func': 2, 'return_count': 40, 'platform_return_rate': 0.001,
+                      'view_type': 'preview'},
+            'rule3': {'cal_score_func': 2, 'return_count': 100, 'platform_return_rate': 0.001,
+                      'view_type': 'preview'},
+        },
+        'data_params': DATA_PARAMS,
+        'params_list': [
+            {'data': 'data1', 'rule': 'rule2'},
+            {'data': 'data1', 'rule': 'rule3'},
+            {'data': 'data2', 'rule': 'rule2'},
+            {'data': 'data3', 'rule': 'rule2'},
+            {'data': 'data4', 'rule': 'rule2'},
+            {'data': 'data7', 'rule': 'rule2'},
+            {'data': 'data6', 'rule': 'rule2'},
+        ]
+    }
+
+    # 地域分组小时级更新24h使用数据  loghubods.video_each_day_update_province_24h_total_apptype
+    PROJECT_REGION_24H_APP_TYPE = 'loghubods'
+    TABLE_REGION_24H_APP_TYPE = 'video_each_day_update_province_24h_total_apptype'
+
+    # 地域分组小时级更新24h规则参数
+    RULE_PARAMS_REGION_24H_APP_TYPE = {
+        'rule_params': {
+            'rule2': {'view_type': 'video-show', 'return_count': 21, 'score_rule': 0,
+                      'platform_return_rate': 0.001},
+            'rule3': {'view_type': 'preview', 'return_count': 21, 'score_rule': 0,
+                      'platform_return_rate': 0.001},
+        },
+        'data_params': DATA_PARAMS,
+        'params_list': [
+            {'data': 'data1', 'rule': 'rule2'},
+            {'data': 'data2', 'rule': 'rule2'},
+            {'data': 'data3', 'rule': 'rule2'},
+            {'data': 'data4', 'rule': 'rule2'},
+            {'data': 'data6', 'rule': 'rule2'},
+            {'data': 'data7', 'rule': 'rule3'},
+        ]
+    }
+
+    # 地域分组小时级规则更新使用数据
+    PROJECT_REGION_APP_TYPE = 'loghubods'
+    TABLE_REGION_APP_TYPE = 'video_each_hour_update_province_apptype'
+
+    # 地域分组小时级规则参数
+    RULE_PARAMS_REGION_APP_TYPE = {
+        'rule_params': {
+            # 'rule2': {'view_type': 'video-show', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule2'},
+            'rule3': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                      'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule2'},
+            'rule4': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                      'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3'},
+            'rule6': {'view_type': 'preview', 'platform_return_rate': 0.001,
+                      'region_24h_rule_key': 'rule3', '24h_rule_key': 'rule2'},
+        },
+        'data_params': DATA_PARAMS,
+        'params_list': [
+            {'data': 'data1', 'rule': 'rule3'},
+            {'data': 'data1', 'rule': 'rule4'},
+            {'data': 'data2', 'rule': 'rule3'},
+            {'data': 'data3', 'rule': 'rule3'},
+            {'data': 'data4', 'rule': 'rule3'},
+            {'data': 'data6', 'rule': 'rule3'},
+            {'data': 'data7', 'rule': 'rule6'},
+        ],
+    }
+
+
+    """
     RULE_PARAMS_24H_APP_TYPE = {
         APP_TYPE['VLOG']: {
             'rule_params': {
@@ -297,11 +346,8 @@ class BaseConfig(object):
         },
     }
 
-    # 地域分组小时级更新24h使用数据  loghubods.video_each_day_update_province_24h_total_apptype
-    PROJECT_REGION_24H_APP_TYPE = 'loghubods'
-    TABLE_REGION_24H_APP_TYPE = 'video_each_day_update_province_24h_total_apptype'
+    
 
-    # 地域分组小时级更新24h规则参数
     RULE_PARAMS_REGION_24H_APP_TYPE = {
         APP_TYPE['VLOG']: {
             'rule_params': {
@@ -416,11 +462,8 @@ class BaseConfig(object):
         },
     }
 
-    # 地域分组小时级规则更新使用数据
-    PROJECT_REGION_APP_TYPE = 'loghubods'
-    TABLE_REGION_APP_TYPE = 'video_each_hour_update_province_apptype'
-
-    # 地域分组小时级规则参数
+    
+    
     RULE_PARAMS_REGION_APP_TYPE = {
         APP_TYPE['VLOG']: {
             'rule_params': {
@@ -543,21 +586,21 @@ class BaseConfig(object):
             ],
         },
     }
+    """
+
 
     # 不区分地域数据使用相对48h数据
     RULE_PARAMS_REGION_APP_TYPE_48H = {
-        APP_TYPE['VLOG']: {
-            'rule_params': {
-                'rule5': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
-                          'region_24h_rule_key': 'rule2', '48h_rule_key': 'rule1'},
-            },
-            'data_params': {
-                'data1': [APP_TYPE['VLOG'], ],
-            },
-            'params_list': [
-                {'data': 'data1', 'rule': 'rule5'},
-            ],
+        'rule_params': {
+            'rule5': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                      'region_24h_rule_key': 'rule2', '48h_rule_key': 'rule1'},
+        },
+        'data_params': {
+            'data1': [APP_TYPE['VLOG'], ],
         },
+        'params_list': [
+            {'data': 'data1', 'rule': 'rule5'},
+        ],
     }
 
     # 老视频更新使用数据
@@ -595,18 +638,18 @@ class BaseConfig(object):
     RECALL_KEY_NAME_PREFIX_DUP_DAY_PRE = 'com.weiqu.video.recall.hot.item.score.dup.day.pre.'
 
     # 小程序小时级48h数据更新结果存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:48h:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_BY_48H = 'recall:item:score:apptype:48h:'
+    # 完整格式:recall:item:score:48h:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_BY_48H = 'recall:item:score:48h:'
     # 小程序小时级48h数据 筛选后的剩余数据 更新结果存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:48h:other:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_BY_48H_OTHER = 'recall:item:score:apptype:48h:other:'
+    # 完整格式:recall:item:score:48h:other:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_BY_48H_OTHER = 'recall:item:score:48h:other:'
 
     # 小程序小时级24h数据更新结果存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:24h:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_BY_24H = 'recall:item:score:apptype:24h:'
+    # 完整格式:recall:item:score:24h:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_BY_24H = 'recall:item:score:24h:'
     # 小程序小时级24h数据 筛选后的剩余数据 更新结果存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:24h:other:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_BY_24H_OTHER = 'recall:item:score:apptype:24h:other:'
+    # 完整格式:recall:item:score:24h:other:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_BY_24H_OTHER = 'recall:item:score:24h:other:'
     # 小程序离线ROV模型结果与小程序小时级24h更新结果去重后 存放 redis key前缀,
     # 完整格式:com.weiqu.video.recall.hot.item.score.dup.24h.{rule_key}.{date}.{h}
     RECALL_KEY_NAME_PREFIX_DUP_24H = 'com.weiqu.video.recall.hot.item.score.dup.24h.'
@@ -614,32 +657,32 @@ class BaseConfig(object):
     H_VIDEO_FILER_24H = 'com.weiqu.video.filter.apptype.h.item.24h.'
 
     # 小程序地域分组小时级更新结果存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:region:h:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_REGION_BY_H = 'recall:item:score:apptype:region:h:'
+    # 完整格式:recall:item:score:region:h:{region}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_REGION_BY_H = 'recall:item:score:region:h:'
     # 小程序地域分组天级更新结果与小程序地域分组小时级更新结果去重后 存放 redis key前缀,
     # 完整格式:com.weiqu.video.recall.hot.item.score.dup1.region.day.h.{region}.{rule_key}.{date}.{h}
     RECALL_KEY_NAME_PREFIX_DUP1_REGION_DAY_H = 'com.weiqu.video.recall.hot.item.score.dup1.region.day.h.'
     # 小程序地域分组小时级更新24h结果与小程序地域分组小时级更新结果去重后 存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:region:dup1:region24h:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H = 'recall:item:score:apptype:region:dup1:region24h:'
+    # 完整格式:recall:item:score:region:dup1:region24h:{region}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H = 'recall:item:score:region:dup1:region24h:'
     # 小程序天级更新结果与 小程序地域分组天级更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
     # 完整格式:com.weiqu.video.recall.hot.item.score.dup2.region.day.h.{region}.{rule_key}.{date}.{h}
     RECALL_KEY_NAME_PREFIX_DUP2_REGION_DAY_H = 'com.weiqu.video.recall.hot.item.score.dup2.region.day.h.'
     # 小程序24h更新结果与 小程序地域分组24h更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:region:dup2:24h:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H = 'recall:item:score:apptype:region:dup2:24h:'
+    # 完整格式:recall:item:score:region:dup2:24h:{region}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H = 'recall:item:score:region:dup2:24h:'
     # 小程序小时级24h数据 筛选后的剩余数据 更新结果 与 小程序24h更新结果/小程序地域分组24h更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:region:dup3:24h:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H = 'recall:item:score:apptype:region:dup3:24h:'
+    # 完整格式:recall:item:score:region:dup3:24h:{region}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H = 'recall:item:score:region:dup3:24h:'
     # 小程序48h更新结果与 小程序地域分组24h更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:region:dup2:48h:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H = 'recall:item:score:apptype:region:dup2:48h:'
+    # 完整格式:recall:item:score:region:dup2:48h:{region}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H = 'recall:item:score:region:dup2:48h:'
     # 小程序小时级48h数据 筛选后的剩余数据 更新结果 与 小程序48h更新结果/小程序地域分组24h更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:region:dup3:48h:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H = 'recall:item:score:apptype:region:dup3:48h:'
+    # 完整格式:recall:item:score:region:dup3:48h:{region}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H = 'recall:item:score:region:dup3:48h:'
     # 小程序离线ROV模型结果与 小程序天级更新结果/小程序地域分组天级更新结果/小程序地域分组小时级更新结果 去重后 存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:region:dup4:rov:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_DUP_REGION_H = 'recall:item:score:apptype:region:dup4:rov:'
+    # 完整格式:recall:item:score:region:dup4:rov:{region}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_DUP_REGION_H = 'recall:item:score:region:dup4:rov:'
 
     # 地域分组小时级视频状态不符合推荐要求的列表 redis key,
     # 完整格式:com.weiqu.video.filter.apptype.region.h.item.{region}.{appType}.{data_key}.{rule_key}
@@ -651,8 +694,8 @@ class BaseConfig(object):
     RECALL_KEY_NAME_PREFIX_REGION_BY_DAY = 'com.weiqu.video.recall.item.score.region.day.'
 
     # 小程序地域分组小时级更新24h结果存放 redis key前缀,
-    # 完整格式:recall:item:score:apptype:region:24h:{region}:{appType}:{data_key}:{rule_key}:{date}:{h}
-    RECALL_KEY_NAME_PREFIX_REGION_BY_24H = 'recall:item:score:apptype:region:24h:'
+    # 完整格式:recall:item:score:region:24h:{region}:{data_key}:{rule_key}:{date}:{h}
+    RECALL_KEY_NAME_PREFIX_REGION_BY_24H = 'recall:item:score:region:24h:'
     # 小程序天级更新结果与 小程序地域分组小时级更新24h结果 去重后 存放 redis key前缀,
     # 完整格式:com.weiqu.video.recall.hot.item.score.dup.region.day.24h.{region}.{rule_key}.{date}.{h}
     RECALL_KEY_NAME_PREFIX_DUP_REGION_DAY_24H = 'com.weiqu.video.recall.hot.item.score.dup.region.day.24h.'

+ 24 - 35
redis_data_monitor.py

@@ -31,42 +31,31 @@ def rov_data_monitor(now_date, now_h):
 
 def region_data_monitor(now_date, now_h, rule_params, key_prefix_dict):
     """地域分组数据"""
-    # 地域分组小时级列表
-    # rule_params = config_.RULE_PARAMS_REGION_APP_TYPE
-    # key_prefix_dict = {
-    #     '地域分组小时级数据': config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H,
-    #     '地域分组相对24h去重后数据': config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H,
-    #     '不区分地域相对24h去重后数据': config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H,
-    #     '不区分地域相对24h筛选后剩余去重后数据': config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H,
-    #     'rov模型预测列表去重后数据': config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H,
-    # }
     region_code_list = [code for region, code in region_code.items()]
-    for app_type, params in rule_params.items():
-        for param in params.get('params_list'):
-            data_key = param.get('data')
-            rule_key = param.get('rule')
-            for key_con, key_prefix in key_prefix_dict.items():
-                if key_con == '不区分地域相对24h筛选后剩余去重后数据' and rule_key != 'rule4':
-                    continue
-                no_update_region_list = []
-                for region in region_code_list:
-                    region_key_name = f"{key_prefix}{region}:{app_type}:{data_key}:{rule_key}:{now_date}:{now_h}"
-                    if not redis_helper.key_exists(key_name=region_key_name):
-                        no_update_region_list.append(region)
-                if len(no_update_region_list) == len(region_code_list):
-                    msg_text = f"\n- 所属项目: rov-server/rov-offline" \
-                               f"\n- 告警名称: 离线更新数据未按时更新告警" \
-                               f"\n- 所属环境: {config_.ENV_TEXT}" \
-                               f"\n- appType: {app_type}" \
-                               f"\n- now_date: {now_date}" \
-                               f"\n- now_h: {now_h}" \
-                               f"\n- 告警描述: {key_con}未按时更新, data_key={data_key}, rule_key={rule_key}"
-                    log_.info(f"msg_text = {msg_text}")
-                    send_msg_to_feishu(
-                        webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
-                        key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
-                        msg_text=msg_text
-                    )
+    for param in rule_params.get('params_list'):
+        data_key = param.get('data')
+        rule_key = param.get('rule')
+        for key_con, key_prefix in key_prefix_dict.items():
+            if key_con == '不区分地域相对24h筛选后剩余去重后数据' and rule_key != 'rule4':
+                continue
+            no_update_region_list = []
+            for region in region_code_list:
+                region_key_name = f"{key_prefix}{region}:{data_key}:{rule_key}:{now_date}:{now_h}"
+                if not redis_helper.key_exists(key_name=region_key_name):
+                    no_update_region_list.append(region)
+            if len(no_update_region_list) == len(region_code_list):
+                msg_text = f"\n- 所属项目: rov-server/rov-offline" \
+                           f"\n- 告警名称: 离线更新数据未按时更新告警" \
+                           f"\n- 所属环境: {config_.ENV_TEXT}" \
+                           f"\n- now_date: {now_date}" \
+                           f"\n- now_h: {now_h}" \
+                           f"\n- 告警描述: {key_con}未按时更新, data_key={data_key}, rule_key={rule_key}"
+                log_.info(f"msg_text = {msg_text}")
+                send_msg_to_feishu(
+                    webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
+                    key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
+                    msg_text=msg_text
+                )
 
 
 def special_videos_monitor(now_date, now_h):

+ 88 - 92
region_rule_rank_h.py

@@ -137,7 +137,7 @@ def cal_score(df, param):
     return df
 
 
-def video_rank(df, now_date, now_h, rule_key, param, region, app_type, data_key, rule_rank_h_flag):
+def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank_h_flag):
     """
     获取符合进入召回源条件的视频,与每日更新的rov模型结果视频列表进行合并
     :param df:
@@ -182,7 +182,7 @@ def video_rank(df, now_date, now_h, rule_key, param, region, app_type, data_key,
         h_recall_result[int(video_id)] = float(score)
         h_video_ids.append(int(video_id))
     h_recall_key_name = \
-        f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H}{region}:{app_type}:{data_key}:{rule_key}:" \
+        f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H}{region}:{data_key}:{rule_key}:" \
         f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
     if len(h_recall_result) > 0:
         redis_helper.add_data_with_zset(key_name=h_recall_key_name, data=h_recall_result, expire_time=23 * 3600)
@@ -197,7 +197,7 @@ def video_rank(df, now_date, now_h, rule_key, param, region, app_type, data_key,
     # 与其他召回视频池去重,存入对应的redis
     dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,
                  region_24h_rule_key=region_24h_rule_key, by_24h_rule_key=by_24h_rule_key, by_48h_rule_key=by_48h_rule_key,
-                 region=region, app_type=app_type, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag)
+                 region=region, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag)
 
 
 def dup_data(h_video_ids, initial_key_name, dup_key_name, region):
@@ -223,14 +223,14 @@ def dup_data(h_video_ids, initial_key_name, dup_key_name, region):
     return h_video_ids
 
 
-def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key, by_48h_rule_key, region, app_type, data_key, rule_rank_h_flag):
+def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by_24h_rule_key, by_48h_rule_key, region, data_key, rule_rank_h_flag):
     """将地域分组小时级数据与其他召回视频池去重,存入对应的redis"""
     # ##### 去重更新地域分组小时级24h列表,并另存为redis中
     region_24h_key_name = \
-        f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H}{region}:{app_type}:{data_key}:{region_24h_rule_key}:" \
+        f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H}{region}:{data_key}:{region_24h_rule_key}:" \
         f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
     region_24h_dup_key_name = \
-        f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
+        f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
         f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
     h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=region_24h_key_name,
                            dup_key_name=region_24h_dup_key_name, region=region)
@@ -238,40 +238,40 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
     if rule_rank_h_flag == '48h':
 
         # ##### 去重小程序相对48h更新结果,并另存为redis中
-        h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H}{app_type}:{data_key}:{by_48h_rule_key}:" \
+        h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H}{data_key}:{by_48h_rule_key}:" \
                          f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
         h_48h_dup_key_name = \
-            f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
+            f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
             f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
         h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_48h_key_name,
                                dup_key_name=h_48h_dup_key_name, region=region)
 
         # ##### 去重小程序相对48h 筛选后剩余数据 更新结果,并另存为redis中
         if by_48h_rule_key == 'rule1':
-            other_h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H_OTHER}{app_type}:{data_key}:" \
+            other_h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H_OTHER}{data_key}:" \
                                    f"{by_48h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
             other_h_48h_dup_key_name = \
-                f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
+                f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
                 f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
             h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_48h_key_name,
                                    dup_key_name=other_h_48h_dup_key_name, region=region)
 
     else:
         # ##### 去重小程序相对24h更新结果,并另存为redis中
-        h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{app_type}:{data_key}:{by_24h_rule_key}:" \
+        h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{data_key}:{by_24h_rule_key}:" \
                          f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
         h_24h_dup_key_name = \
-            f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
+            f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
             f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
         h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_24h_key_name,
                                dup_key_name=h_24h_dup_key_name, region=region)
 
         # ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
         if by_24h_rule_key == 'rule3':
-            other_h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{app_type}:{data_key}:" \
+            other_h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{data_key}:" \
                                    f"{by_24h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
             other_h_24h_dup_key_name = \
-                f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
+                f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
                 f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
             h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_24h_key_name,
                                    dup_key_name=other_h_24h_dup_key_name, region=region)
@@ -279,7 +279,7 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
     # ##### 去重小程序模型更新结果,并另存为redis中
     model_key_name = get_rov_redis_key(now_date=now_date)
     model_data_dup_key_name = \
-        f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}:{app_type}:{data_key}:{rule_key}:" \
+        f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}:{data_key}:{rule_key}:" \
         f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
     h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=model_key_name,
                            dup_key_name=model_data_dup_key_name, region=region)
@@ -458,14 +458,14 @@ def merge_df(df_left, df_right):
     return df_merged[feature_list]
 
 
-def process_with_region(region, df_merged, app_type, data_key, rule_key, rule_param, now_date, now_h, rule_rank_h_flag):
+def process_with_region(region, df_merged, data_key, rule_key, rule_param, now_date, now_h, rule_rank_h_flag):
     log_.info(f"region = {region} start...")
     # 计算score
     region_df = df_merged[df_merged['code'] == region]
     log_.info(f'region = {region}, region_df count = {len(region_df)}')
     score_df = cal_score(df=region_df, param=rule_param)
     video_rank(df=score_df, now_date=now_date, now_h=now_h, rule_key=rule_key, param=rule_param,
-               region=region, app_type=app_type, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag)
+               region=region, data_key=data_key, rule_rank_h_flag=rule_rank_h_flag)
     log_.info(f"region = {region} end!")
 
 
@@ -514,23 +514,54 @@ def process_with_app_type(app_type, params, region_code_list, feature_df, now_da
     # gevent.joinall(task_list)
 
 
+def process_with_param(param, data_params_item, rule_params_item, region_code_list, feature_df, now_date, now_h, rule_rank_h_flag):
+    log_.info(f"param = {param} start...")
+
+    data_key = param.get('data')
+    data_param = data_params_item.get(data_key)
+    log_.info(f"data_key = {data_key}, data_param = {data_param}")
+    df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
+    df_merged = reduce(merge_df, df_list)
+
+    rule_key = param.get('rule')
+    rule_param = rule_params_item.get(rule_key)
+    log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
+    task_list = [
+        gevent.spawn(process_with_region,
+                     region, df_merged, data_key, rule_key, rule_param, now_date, now_h, rule_rank_h_flag)
+        for region in region_code_list
+    ]
+    gevent.joinall(task_list)
+
+    log_.info(f"param = {param} end!")
+
+
 def rank_by_h(project, table, now_date, now_h, rule_params, region_code_list, rule_rank_h_flag):
     # 获取特征数据
     feature_df = get_feature_data(project=project, table=table, now_date=now_date)
     feature_df['apptype'] = feature_df['apptype'].astype(int)
-    # t = [
-    #     gevent.spawn(process_with_app_type, app_type, params, region_code_list, feature_df, now_date, now_h)
-    #     for app_type, params in rule_params.items()
-    # ]
-    # gevent.joinall(t)
-
-    pool = multiprocessing.Pool(processes=len(config_.APP_TYPE))
-    for app_type, params in rule_params.items():
-        pool.apply_async(func=process_with_app_type,
-                         args=(app_type, params, region_code_list, feature_df, now_date, now_h, rule_rank_h_flag))
+    data_params_item = rule_params.get('data_params')
+    rule_params_item = rule_params.get('rule_params')
+    params_list = rule_params.get('params_list')
+    pool = multiprocessing.Pool(processes=len(params_list))
+    for param in params_list:
+        pool.apply_async(
+            func=process_with_param,
+            args=(param, data_params_item, rule_params_item, region_code_list, feature_df, now_date, now_h, rule_rank_h_flag)
+        )
     pool.close()
     pool.join()
 
+
+
+
+    # pool = multiprocessing.Pool(processes=len(config_.APP_TYPE))
+    # for app_type, params in rule_params.items():
+    #     pool.apply_async(func=process_with_app_type,
+    #                      args=(app_type, params, region_code_list, feature_df, now_date, now_h, rule_rank_h_flag))
+    # pool.close()
+    # pool.join()
+
     """
     for app_type, params in rule_params.items():
         log_.info(f"app_type = {app_type} start...")
@@ -610,71 +641,36 @@ def h_rank_bottom(now_date, now_h, rule_params, region_code_list, rule_rank_h_fl
 
     # 以上一小时的地域分组数据作为当前小时的数据
     key_prefix = config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H
-    for app_type, params in rule_params.items():
-        log_.info(f"app_type = {app_type}")
-        rule_params_item = params.get('rule_params')
-        for param in params.get('params_list'):
-            data_key = param.get('data')
-            rule_key = param.get('rule')
-            rule_param = rule_params_item.get(rule_key)
-            log_.info(f"data_key = {data_key}, rule_key = {rule_key}, rule_param = {rule_param}")
-            region_24h_rule_key = rule_param.get('region_24h_rule_key', 'rule1')
-            by_24h_rule_key = rule_param.get('24h_rule_key', None)
-            by_48h_rule_key = rule_param.get('48h_rule_key', None)
-            for region in region_code_list:
-                log_.info(f"region = {region}")
-                key_name = f"{key_prefix}{region}:{app_type}:{data_key}:{rule_key}:{redis_dt}:{redis_h}"
-                initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
-                if initial_data is None:
-                    initial_data = []
-                final_data = dict()
-                h_video_ids = []
-                for video_id, score in initial_data:
-                    final_data[video_id] = score
-                    h_video_ids.append(int(video_id))
-                # 存入对应的redis
-                final_key_name = \
-                    f"{key_prefix}{region}:{app_type}:{data_key}:{rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
-                if len(final_data) > 0:
-                    redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
-                # 清空线上过滤应用列表
-                # redis_helper.del_keys(
-                #     key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{app_type}.{data_key}.{rule_key}")
-
-                # 与其他召回视频池去重,存入对应的redis
-                dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,
-                             region_24h_rule_key=region_24h_rule_key, region=region,
-                             app_type=app_type, data_key=data_key, by_24h_rule_key=by_24h_rule_key,
-                             by_48h_rule_key=by_48h_rule_key, rule_rank_h_flag=rule_rank_h_flag)
-
-
-        # for data_key, data_param in params['data_params'].items():
-        #     log_.info(f"data_key = {data_key}, data_param = {data_param}")
-        #     for rule_key, rule_param in params['rule_params'].items():
-        #         log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-        #         region_24h_rule_key = rule_param.get('region_24h_rule_key', 'rule1')
-        #         for region in region_code_list:
-        #             log_.info(f"region = {region}")
-        #             key_name = f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{redis_dt}.{redis_h}"
-        #             initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
-        #             if initial_data is None:
-        #                 initial_data = []
-        #             final_data = dict()
-        #             h_video_ids = []
-        #             for video_id, score in initial_data:
-        #                 final_data[video_id] = score
-        #                 h_video_ids.append(int(video_id))
-        #             # 存入对应的redis
-        #             final_key_name = \
-        #                 f"{key_prefix}{region}.{app_type}.{data_key}.{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
-        #             if len(final_data) > 0:
-        #                 redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
-        #             # 清空线上过滤应用列表
-        #             redis_helper.del_keys(key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{app_type}.{data_key}.{rule_key}")
-        #             # 与其他召回视频池去重,存入对应的redis
-        #             dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,
-        #                          region_24h_rule_key=region_24h_rule_key, region=region,
-        #                          app_type=app_type, data_key=data_key)
+    rule_params_item = rule_params.get('rule_params')
+    for param in rule_params.get('params_list'):
+        data_key = param.get('data')
+        rule_key = param.get('rule')
+        rule_param = rule_params_item.get(rule_key)
+        log_.info(f"data_key = {data_key}, rule_key = {rule_key}, rule_param = {rule_param}")
+        region_24h_rule_key = rule_param.get('region_24h_rule_key', 'rule1')
+        by_24h_rule_key = rule_param.get('24h_rule_key', None)
+        by_48h_rule_key = rule_param.get('48h_rule_key', None)
+        for region in region_code_list:
+            log_.info(f"region = {region}")
+            key_name = f"{key_prefix}{region}:{data_key}:{rule_key}:{redis_dt}:{redis_h}"
+            initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
+            if initial_data is None:
+                initial_data = []
+            final_data = dict()
+            h_video_ids = []
+            for video_id, score in initial_data:
+                final_data[video_id] = score
+                h_video_ids.append(int(video_id))
+            # 存入对应的redis
+            final_key_name = \
+                f"{key_prefix}{region}:{data_key}:{rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+            if len(final_data) > 0:
+                redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
+            # 与其他召回视频池去重,存入对应的redis
+            dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key,
+                         region_24h_rule_key=region_24h_rule_key, region=region,
+                         data_key=data_key, by_24h_rule_key=by_24h_rule_key,
+                         by_48h_rule_key=by_48h_rule_key, rule_rank_h_flag=rule_rank_h_flag)
 
 
 def h_timer_check():

+ 64 - 8
region_rule_rank_h_by24h.py

@@ -129,7 +129,7 @@ def cal_score(df, param):
     return df
 
 
-def video_rank(df, now_date, now_h, rule_key, param, region, app_type, data_key):
+def video_rank(df, now_date, now_h, rule_key, param, region, data_key):
     """
     获取符合进入召回源条件的视频
     :param df:
@@ -168,7 +168,7 @@ def video_rank(df, now_date, now_h, rule_key, param, region, app_type, data_key)
         h_video_ids.append(int(video_id))
 
     day_recall_key_name = \
-        f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H}{region}:{app_type}:{data_key}:{rule_key}:" \
+        f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H}{region}:{data_key}:{rule_key}:" \
         f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
     if len(day_recall_result) > 0:
         redis_helper.add_data_with_zset(key_name=day_recall_key_name, data=day_recall_result, expire_time=2 * 3600)
@@ -197,15 +197,14 @@ def merge_df(df_left, df_right):
     return df_merged[feature_list]
 
 
-def process_with_region(region, df_merged, app_type, data_key, rule_key, rule_param, now_date, now_h):
+def process_with_region(region, df_merged, data_key, rule_key, rule_param, now_date, now_h):
     log_.info(f"region = {region} start...")
     # 计算score
     region_df = df_merged[df_merged['code'] == region]
     log_.info(f'region = {region}, region_df count = {len(region_df)}')
     score_df = cal_score(df=region_df, param=rule_param)
-    video_rank(df=score_df, now_date=now_date, now_h=now_h,
-               rule_key=rule_key, param=rule_param, region=region,
-               app_type=app_type, data_key=data_key)
+    video_rank(df=score_df, now_date=now_date, now_h=now_h, region=region,
+               rule_key=rule_key, param=rule_param, data_key=data_key)
     log_.info(f"region = {region} end!")
 
 
@@ -232,17 +231,52 @@ def process_with_app_type(app_type, params, region_code_list, feature_df, now_da
     log_.info(f"app_type = {app_type} end!")
 
 
+def process_with_param(param, data_params_item, rule_params_item, region_code_list, feature_df, now_date, now_h):
+    log_.info(f"param = {param} start...")
+
+    data_key = param.get('data')
+    data_param = data_params_item.get(data_key)
+    log_.info(f"data_key = {data_key}, data_param = {data_param}")
+    df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
+    df_merged = reduce(merge_df, df_list)
+
+    rule_key = param.get('rule')
+    rule_param = rule_params_item.get(rule_key)
+    log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
+    task_list = [
+        gevent.spawn(process_with_region, region, df_merged, data_key, rule_key, rule_param, now_date, now_h)
+        for region in region_code_list
+    ]
+    gevent.joinall(task_list)
+
+    log_.info(f"param = {param} end!")
+
+
 def rank_by_24h(project, table, now_date, now_h, rule_params, region_code_list):
     # 获取特征数据
     feature_df = get_feature_data(project=project, table=table, now_date=now_date)
     feature_df['apptype'] = feature_df['apptype'].astype(int)
     # rank
+    data_params_item = rule_params.get('data_params')
+    rule_params_item = rule_params.get('rule_params')
+    params_list = rule_params.get('params_list')
+    pool = multiprocessing.Pool(processes=len(params_list))
+    for param in params_list:
+        pool.apply_async(
+            func=process_with_param,
+            args=(param, data_params_item, rule_params_item, region_code_list, feature_df, now_date, now_h)
+        )
+    pool.close()
+    pool.join()
+
+    """
     pool = multiprocessing.Pool(processes=len(config_.APP_TYPE))
     for app_type, params in rule_params.items():
         pool.apply_async(func=process_with_app_type,
                          args=(app_type, params, region_code_list, feature_df, now_date, now_h))
     pool.close()
     pool.join()
+    """
 
     # for app_type, params in rule_params.items():
     #     log_.info(f"app_type = {app_type}")
@@ -331,6 +365,28 @@ def h_rank_bottom(now_date, now_h, rule_params, region_code_list):
 
     # 以上一小时的地域分组数据作为当前小时的数据
     key_prefix = config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H
+    for param in rule_params.get('params_list'):
+        data_key = param.get('data')
+        rule_key = param.get('rule')
+        log_.info(f"data_key = {data_key}, rule_key = {rule_key}")
+        for region in region_code_list:
+            log_.info(f"region = {region}")
+            key_name = f"{key_prefix}{region}:{data_key}:{rule_key}:{redis_dt}:{redis_h}"
+            initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
+            if initial_data is None:
+                initial_data = []
+            final_data = dict()
+            h_video_ids = []
+            for video_id, score in initial_data:
+                final_data[video_id] = score
+                h_video_ids.append(int(video_id))
+            # 存入对应的redis
+            final_key_name = \
+                f"{key_prefix}{region}:{data_key}:{rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+            if len(final_data) > 0:
+                redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=2 * 3600)
+
+    """
     for app_type, params in rule_params.items():
         log_.info(f"app_type = {app_type}")
         for param in params.get('params_list'):
@@ -358,6 +414,7 @@ def h_rank_bottom(now_date, now_h, rule_params, region_code_list):
 
                 # 与其他召回视频池去重,存入对应的redis
                 # dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key, region=region)
+    """
 
 
 def h_timer_check():
@@ -379,8 +436,7 @@ def h_timer_check():
         log_.info(f"region_24h_data end!")
     elif now_min > 50:
         log_.info('24h_recall data is None, use bottom data!')
-        for key, _ in rule_params.items():
-            h_rank_bottom(now_date=now_date, now_h=now_h, rule_params=rule_params, region_code_list=region_code_list)
+        h_rank_bottom(now_date=now_date, now_h=now_h, rule_params=rule_params, region_code_list=region_code_list)
         log_.info(f"region_24h_data end!")
     else:
         # 数据没准备好,1分钟后重新检查

+ 54 - 18
rule_rank_h_by_24h.py

@@ -125,7 +125,7 @@ def cal_score2(df, param):
     return df
 
 
-def video_rank_h(df, now_date, now_h, rule_key, param, app_type, data_key):
+def video_rank_h(df, now_date, now_h, rule_key, param, data_key):
     """
     获取符合进入召回源条件的视频,与每日更新的rov模型结果视频列表进行合并
     :param df:
@@ -133,18 +133,11 @@ def video_rank_h(df, now_date, now_h, rule_key, param, app_type, data_key):
     :param now_h:
     :param rule_key: 天级规则数据进入条件
     :param param: 天级规则数据进入条件参数
-    :param app_type:
     :param data_key: 使用数据标识
     :return:
     """
     redis_helper = RedisHelper()
-    # 获取rov模型结果
-    # key_name = get_rov_redis_key(now_date=now_date)
-    # initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
-    # if initial_data is None:
-    #     initial_data = []
-    # log_.info(f'initial data count = {len(initial_data)}')
-    log_.info(f"app_type = {app_type}, videos_count = {len(df)}")
+    log_.info(f"videos_count = {len(df)}")
 
     # videoid重复时,保留分值高
     df = df.sort_values(by=['score'], ascending=False)
@@ -159,11 +152,6 @@ def video_rank_h(df, now_date, now_h, rule_key, param, app_type, data_key):
         day_recall_df = df
     platform_return_rate = param.get('platform_return_rate', 0)
     day_recall_df = day_recall_df[day_recall_df['platform_return_rate'] > platform_return_rate]
-
-    # videoid重复时,保留分值高
-    # day_recall_df = day_recall_df.sort_values(by=['score'], ascending=False)
-    # day_recall_df = day_recall_df.drop_duplicates(subset=['videoid'], keep='first')
-    # day_recall_df['videoid'] = day_recall_df['videoid'].astype(int)
     day_recall_videos = day_recall_df['videoid'].to_list()
     log_.info(f'h_by24h_recall videos count = {len(day_recall_videos)}')
 
@@ -179,10 +167,13 @@ def video_rank_h(df, now_date, now_h, rule_key, param, app_type, data_key):
         score = day_recall_df[day_recall_df['videoid'] == video_id]['score']
         day_recall_result[int(video_id)] = float(score)
         day_video_ids.append(int(video_id))
+    # h_24h_recall_key_name = \
+    #     f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{app_type}:{data_key}:{rule_key}:{now_dt}:{now_h}"
     h_24h_recall_key_name = \
-        f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{app_type}:{data_key}:{rule_key}:{now_dt}:{now_h}"
+        f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{data_key}:{rule_key}:{now_dt}:{now_h}"
+
     if len(day_recall_result) > 0:
-        log_.info(f"count = {len(day_recall_result)}")
+        log_.info(f"count = {len(day_recall_result)}, key = {h_24h_recall_key_name}")
         redis_helper.add_data_with_zset(key_name=h_24h_recall_key_name, data=day_recall_result, expire_time=2 * 3600)
         # 清空线上过滤应用列表
         # redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER_24H}{app_type}.{data_key}.{rule_key}")
@@ -202,8 +193,10 @@ def video_rank_h(df, now_date, now_h, rule_key, param, app_type, data_key):
         for video_id in other_videos:
             score = df[df['videoid'] == video_id]['score']
             other_24h_recall_result[int(video_id)] = float(score)
+        # other_h_24h_recall_key_name = \
+        #     f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{app_type}:{data_key}:{rule_key}:{now_dt}:{now_h}"
         other_h_24h_recall_key_name = \
-            f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{app_type}:{data_key}:{rule_key}:{now_dt}:{now_h}"
+            f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{data_key}:{rule_key}:{now_dt}:{now_h}"
         if len(other_24h_recall_result) > 0:
             log_.info(f"count = {len(other_24h_recall_result)}")
             redis_helper.add_data_with_zset(key_name=other_h_24h_recall_key_name, data=other_24h_recall_result,
@@ -244,6 +237,28 @@ def rank_by_h(now_date, now_h, rule_params, project, table):
     feature_df = get_feature_data(now_date=now_date, now_h=now_h, project=project, table=table)
     feature_df['apptype'] = feature_df['apptype'].astype(int)
     # rank
+    data_params_item = rule_params.get('data_params')
+    rule_params_item = rule_params.get('rule_params')
+    for param in rule_params.get('params_list'):
+        data_key = param.get('data')
+        data_param = data_params_item.get(data_key)
+        log_.info(f"data_key = {data_key}, data_param = {data_param}")
+        df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
+        df_merged = reduce(merge_df, df_list)
+
+        rule_key = param.get('rule')
+        rule_param = rule_params_item.get(rule_key)
+        log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
+        # 计算score
+        cal_score_func = rule_param.get('cal_score_func', 1)
+        if cal_score_func == 2:
+            score_df = cal_score2(df=df_merged, param=rule_param)
+        else:
+            score_df = cal_score1(df=df_merged)
+        video_rank_h(df=score_df, now_date=now_date, now_h=now_h,
+                     rule_key=rule_key, param=rule_param, data_key=data_key)
+
+    """
     for app_type, params in rule_params.items():
         log_.info(f"app_type = {app_type}")
         data_params_item = params.get('data_params')
@@ -266,7 +281,7 @@ def rank_by_h(now_date, now_h, rule_params, project, table):
                 score_df = cal_score1(df=df_merged)
             video_rank_h(df=score_df, now_date=now_date, now_h=now_h, rule_key=rule_key, param=rule_param,
                          app_type=app_type, data_key=data_key)
-
+    """
     #     # to-csv
     #     score_filename = f"score_by24h_{key}_{datetime.strftime(now_date, '%Y%m%d%H')}.csv"
     #     score_df.to_csv(f'./data/{score_filename}')
@@ -288,6 +303,26 @@ def h_rank_bottom(now_date, now_h, rule_params):
         redis_dt = datetime.strftime(now_date, '%Y%m%d')
         redis_h = now_h - 1
     key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_BY_24H, config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER]
+
+    for param in rule_params.get('params_list'):
+        data_key = param.get('data')
+        rule_key = param.get('rule')
+        log_.info(f"data_key = {data_key}, rule_key = {rule_key}")
+        for key_prefix in key_prefix_list:
+            key_name = f"{key_prefix}{data_key}:{rule_key}:{redis_dt}:{redis_h}"
+            initial_data = redis_helper.get_all_data_from_zset(key_name=key_name, with_scores=True)
+            if initial_data is None:
+                initial_data = []
+            final_data = dict()
+            for video_id, score in initial_data:
+                final_data[video_id] = score
+            # 存入对应的redis
+            final_key_name = \
+                f"{key_prefix}{data_key}:{rule_key}:{datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+            if len(final_data) > 0:
+                redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=2 * 3600)
+
+    """
     for app_type, params in rule_params.items():
         log_.info(f"app_type = {app_type}")
         for param in params.get('params_list'):
@@ -309,6 +344,7 @@ def h_rank_bottom(now_date, now_h, rule_params):
                     redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=2 * 3600)
                 # 清空线上过滤应用列表
                 # redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER_24H}{app_type}.{data_key}.{rule_key}")
+    """
 
 
 def h_timer_check():

+ 40 - 31
videos_filter.py

@@ -1,3 +1,4 @@
+import multiprocessing
 import gevent
 import os
 import time
@@ -568,8 +569,8 @@ def filter_old_videos():
     log_.info("old videos filter end!")
 
 
-def filter_process_with_region(app_type, data_key, rule_key, region, now_date, now_h):
-    log_.info(f"app_type = {app_type}, data_key = {data_key}, rule_key = {rule_key}, region = {region}")
+def filter_process_with_region(data_key, rule_key, region, now_date, now_h):
+    log_.info(f"data_key = {data_key}, rule_key = {rule_key}, region = {region}")
     # 需过滤视频列表
     key_prefix_list = [
         config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H,
@@ -584,7 +585,7 @@ def filter_process_with_region(app_type, data_key, rule_key, region, now_date, n
     ]
     for i, key_prefix in enumerate(key_prefix_list):
         # 拼接key
-        key_name = f"{key_prefix}{region}:{app_type}:{data_key}:{rule_key}:{now_date}:{now_h}"
+        key_name = f"{key_prefix}{region}:{data_key}:{rule_key}:{now_date}:{now_h}"
         # 获取视频
         data = redis_helper.get_all_data_from_zset(key_name=key_name)
         if data is None:
@@ -621,45 +622,53 @@ def filter_process_with_region(app_type, data_key, rule_key, region, now_date, n
         #                                             f"{region}.{app_type}.{data_key}.{rule_key}",
         #                                    values=filter_videos, expire_time=2 * 3600)
 
-    log_.info(f"app_type = {app_type}, data_key = {data_key}, rule_key = {rule_key}, region = {region} "
-              f"videos filter end!")
+    log_.info(f"data_key = {data_key}, rule_key = {rule_key}, region = {region} videos filter end!")
+
+
+def filter_process_with_param(param, region_code_list, now_date, now_h):
+    data_key = param.get('data')
+    rule_key = param.get('rule')
+    log_.info(f"param = {param} videos filter start... ")
+    task_list = [
+        gevent.spawn(filter_process_with_region, data_key, rule_key, region, now_date, now_h)
+        for region in region_code_list
+    ]
+    gevent.joinall(task_list)
+    log_.info(f"param = {param} videos filter end!")
 
 
 def filter_region_videos(rule_params):
     """过滤地域分组规则视频"""
     region_code_list = [code for region, code in region_code.items()]
-    # rule_params = config_.RULE_PARAMS_REGION_APP_TYPE
     log_.info("region_h videos filter start ...")
-    redis_helper = RedisHelper()
     # 获取当前日期
     now_date = date.today().strftime('%Y%m%d')
     # 获取当前所在小时
     now_h = datetime.now().hour
     log_.info(f'now_date = {now_date}, now_h = {now_h}.')
-    task_list = []
-    for app_type, params in rule_params.items():
-        log_.info(f"app_type = {app_type}")
-        for param in params.get('params_list'):
-            data_key = param.get('data')
-            rule_key = param.get('rule')
-            log_.info(f"data_key = {data_key}, rule_key = {rule_key}")
-            task_list.extend(
-                [
-                    gevent.spawn(filter_process_with_region, app_type, data_key, rule_key, region, now_date, now_h)
-                    for region in region_code_list
-                ]
-            )
-        # for data_key, data_param in params['data_params'].items():
-        #     log_.info(f"data_key = {data_key}, data_param = {data_param}")
-        #     for rule_key, rule_param in params['rule_params'].items():
-        #         log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-        #         task_list.extend(
-        #             [
-        #                 gevent.spawn(filter_process_with_region, app_type, data_key, rule_key, region, now_date, now_h)
-        #                 for region in region_code_list
-        #             ]
-        #         )
-    gevent.joinall(task_list)
+    params_list = rule_params.get('params_list')
+    pool = multiprocessing.Pool(processes=len(params_list))
+    for param in params_list:
+        pool.apply_async(
+            func=filter_process_with_param,
+            args=(param, region_code_list, now_date, now_h)
+        )
+    pool.close()
+    pool.join()
+
+
+    # task_list = []
+    # for param in rule_params.get('params_list'):
+    #     data_key = param.get('data')
+    #     rule_key = param.get('rule')
+    #     log_.info(f"data_key = {data_key}, rule_key = {rule_key}")
+    #     task_list.extend(
+    #         [
+    #             gevent.spawn(filter_process_with_region, data_key, rule_key, region, now_date, now_h)
+    #             for region in region_code_list
+    #         ]
+    #     )
+    # gevent.joinall(task_list)
     log_.info("region_h videos filter end!")