瀏覽代碼

merge app-recommend-update-2022090213

liqian 2 年之前
父節點
當前提交
59fccee7e1
共有 6 個文件被更改,包括 37 次插入15 次删除
  1. 8 3
      config.py
  2. 6 2
      region_rule_rank_h.py
  3. 6 2
      region_rule_rank_h_by24h.py
  4. 9 3
      rule_rank_h_by_24h.py
  5. 4 4
      utils.py
  6. 4 1
      videos_filter.py

+ 8 - 3
config.py

@@ -143,8 +143,7 @@ class BaseConfig(object):
         'data4': {APP_TYPE['VLOG']: 0.5, APP_TYPE['SHORT_VIDEO']: 0.5},  # [vlog, 票圈短视频]
         # 'data5': [APP_TYPE['VLOG'], APP_TYPE['ZUI_JING_QI']],  # [vlog, 最惊奇]
         'data6': {APP_TYPE['VLOG']: 0.25, APP_TYPE['LOVE_LIVE']: 0.25, APP_TYPE['LONG_VIDEO']: 0.25, APP_TYPE['SHORT_VIDEO']: 0.25},
-        # 'data7': [APP_TYPE['VLOG'], APP_TYPE['LOVE_LIVE'], APP_TYPE['LONG_VIDEO'], APP_TYPE['SHORT_VIDEO'],
-        #           APP_TYPE['APP']],
+        'data7': {APP_TYPE['VLOG']: 0.5, APP_TYPE['APP']: 0.5},  # [vlog, 票圈视频APP]
         'data8': {APP_TYPE['VLOG']: 0.7, APP_TYPE['LONG_VIDEO']: 0.3},  # [vlog, 内容精选]
         'data9': {APP_TYPE['VLOG']: 0.3, APP_TYPE['LONG_VIDEO']: 0.7},  # [vlog, 内容精选]
         'data10': {APP_TYPE['VLOG']: 0.7, APP_TYPE['LOVE_LIVE']: 0.3},  # [vlog, 票圈视频]
@@ -211,6 +210,7 @@ class BaseConfig(object):
             {'data': 'data3', 'rule': 'rule4'},
             {'data': 'data4', 'rule': 'rule4'},
             {'data': 'data6', 'rule': 'rule4'},
+            {'data': 'data7', 'rule': 'rule4'},
             {'data': 'data1', 'rule': 'rule5'},
             {'data': 'data1', 'rule': 'rule6'},
             {'data': 'data8', 'rule': 'rule4'},
@@ -219,7 +219,6 @@ class BaseConfig(object):
             {'data': 'data11', 'rule': 'rule4'},
             {'data': 'data12', 'rule': 'rule4'},
             {'data': 'data13', 'rule': 'rule4'},
-
         ]
     }
 
@@ -236,6 +235,8 @@ class BaseConfig(object):
                       'platform_return_rate': 0.001},
             'rule4': {'view_type': 'video-show', 'return_count': 21, 'score_rule': 0,
                       'platform_return_rate': 0.001, 'merge_func': 2},
+            'rule5': {'view_type': 'preview', 'return_count': 21, 'score_rule': 0,
+                      'platform_return_rate': 0.001, 'merge_func': 2},
             # 无回流人群
             'rule6': {'view_type': 'video-show', 'return_count': 21, 'score_rule': 0,
                       'platform_return_rate': 0.001, 'click_score_rate': 0.7},
@@ -251,6 +252,7 @@ class BaseConfig(object):
             {'data': 'data3', 'rule': 'rule4'},
             {'data': 'data4', 'rule': 'rule4'},
             {'data': 'data6', 'rule': 'rule4'},
+            {'data': 'data7', 'rule': 'rule5'},
             {'data': 'data1', 'rule': 'rule6'},
             {'data': 'data1', 'rule': 'rule7'},
             {'data': 'data8', 'rule': 'rule4'},
@@ -278,6 +280,8 @@ class BaseConfig(object):
             #           'region_24h_rule_key': 'rule3', '24h_rule_key': 'rule2'},
             'rule7': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
                       'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2},
+            'rule8': {'view_type': 'preview', 'platform_return_rate': 0.001,
+                      'region_24h_rule_key': 'rule5', '24h_rule_key': 'rule4', 'merge_func': 2},
             'rule9': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', '30day_rule_key': 'rule1'},
             # 无回流人群
@@ -295,6 +299,7 @@ class BaseConfig(object):
             {'data': 'data3', 'rule': 'rule7'},
             {'data': 'data4', 'rule': 'rule7'},
             {'data': 'data6', 'rule': 'rule7'},
+            {'data': 'data7', 'rule': 'rule8'},
             {'data': 'data1', 'rule': 'rule9'},
             {'data': 'data1', 'rule': 'rule10'},
             {'data': 'data1', 'rule': 'rule11'},

+ 6 - 2
region_rule_rank_h.py

@@ -15,7 +15,8 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer, Thread
-from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, check_table_partition_exits
+from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
+    check_table_partition_exits, filter_video_status_app
 from config import set_config
 from log import Log
 from check_video_limit_distribute import update_limit_video_score
@@ -191,7 +192,10 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank
     # log_.info(f'h_recall videos count = {len(h_recall_videos)}')
 
     # 视频状态过滤
-    filtered_videos = filter_video_status(h_recall_videos)
+    if data_key in ['data7', ]:
+        filtered_videos = filter_video_status_app(h_recall_videos)
+    else:
+        filtered_videos = filter_video_status(h_recall_videos)
     # log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
 
     # 屏蔽视频过滤

+ 6 - 2
region_rule_rank_h_by24h.py

@@ -14,7 +14,8 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer, Thread
-from utils import RedisHelper, get_data_from_odps, filter_video_status, check_table_partition_exits
+from utils import RedisHelper, get_data_from_odps, filter_video_status, check_table_partition_exits, \
+    filter_video_status_app
 from config import set_config
 from log import Log
 
@@ -166,7 +167,10 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key):
     # log_.info(f'day_recall videos count = {len(h_recall_videos)}')
 
     # 视频状态过滤
-    filtered_videos = filter_video_status(h_recall_videos)
+    if data_key in ['data7', ]:
+        filtered_videos = filter_video_status_app(h_recall_videos)
+    else:
+        filtered_videos = filter_video_status(h_recall_videos)
     # log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
 
     # 写入对应的redis

+ 9 - 3
rule_rank_h_by_24h.py

@@ -6,7 +6,7 @@ from threading import Timer
 from datetime import datetime, timedelta
 from get_data import get_data_from_odps
 from db_helper import RedisHelper
-from utils import filter_video_status, check_table_partition_exits
+from utils import filter_video_status, check_table_partition_exits, filter_video_status_app
 from config import set_config
 from log import Log
 
@@ -190,7 +190,10 @@ def video_rank_h(df, now_date, now_h, rule_key, param, data_key):
     log_.info(f'h_by24h_recall videos count = {len(day_recall_videos)}')
 
     # 视频状态过滤
-    filtered_videos = filter_video_status(day_recall_videos)
+    if data_key in ['data7', ]:
+        filtered_videos = filter_video_status_app(day_recall_videos)
+    else:
+        filtered_videos = filter_video_status(day_recall_videos)
     log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
 
     # 写入对应的redis
@@ -217,7 +220,10 @@ def video_rank_h(df, now_date, now_h, rule_key, param, data_key):
     all_videos = df['videoid'].to_list()
     log_.info(f'h_by24h_recall all videos count = {len(all_videos)}')
     # 视频状态过滤
-    all_filtered_videos = filter_video_status(all_videos)
+    if data_key in ['data7', ]:
+        all_filtered_videos = filter_video_status_app(all_videos)
+    else:
+        all_filtered_videos = filter_video_status(all_videos)
     log_.info(f'all_filtered_videos count = {len(all_filtered_videos)}')
     # 与筛选结果去重
     other_videos = [video for video in all_filtered_videos if video not in day_video_ids]

+ 4 - 4
utils.py

@@ -274,8 +274,8 @@ def filter_video_status_app(video_ids):
     mysql_helper = MysqlHelper(mysql_info=config_.FILTER_MYSQL_INFO)
     video_status_sql = "SELECT t1.id AS 'video_id', " \
                        "t1.transcode_status AS 'transcoding_status', " \
-                       "t2.audit_status AS 'audit_status', " \
-                       "t2.video_status AS 'open_status', " \
+                       "t2.app_audit_status AS 'app_audit_status', " \
+                       "t2.original_status AS 'open_status', " \
                        "t2.recommend_status AS 'applet_rec_status', " \
                        "t2.app_recommend_status AS 'app_rec_status', " \
                        "t3.charge AS 'payment_status', " \
@@ -288,7 +288,7 @@ def filter_video_status_app(video_ids):
     if len(video_ids) == 1:
         sql = "SELECT video_id " \
               "FROM ({}) " \
-              "WHERE audit_status = 5 " \
+              "WHERE app_audit_status = 5 " \
               "AND app_rec_status IN (1, -6, 10) " \
               "AND open_status = 1 " \
               "AND payment_status = 0 " \
@@ -302,7 +302,7 @@ def filter_video_status_app(video_ids):
         for i in range(len(video_ids) // 2000 + 1):
             sql = "SELECT video_id " \
                   "FROM ({}) " \
-                  "WHERE audit_status = 5 " \
+                  "WHERE app_audit_status = 5 " \
                   "AND app_rec_status IN (1, -6, 10) " \
                   "AND open_status = 1 " \
                   "AND payment_status = 0 " \

+ 4 - 1
videos_filter.py

@@ -594,7 +594,10 @@ def filter_process_with_region(data_key, rule_key, region, now_date, now_h):
             continue
         # 过滤
         video_ids = [int(video_id) for video_id in data]
-        filtered_result = filter_video_status(video_ids=video_ids)
+        if data_key in ['data7', ]:
+            filtered_result = filter_video_status_app(video_ids=video_ids)
+        else:
+            filtered_result = filter_video_status(video_ids=video_ids)
         # 求差集,获取需要过滤掉的视频,并从redis中移除
         filter_videos = set(video_ids) - set(filtered_result)
         log_.info("video_ids size = {}, filtered size = {}, filter sizer = {}".format(len(video_ids),