liqian vor 2 Jahren
Ursprung
Commit
826f4340e6
3 geänderte Dateien mit 102 neuen und 7 gelöschten Zeilen
  1. 3 3
      config.py
  2. 96 2
      laohaokan_recommend_update.py
  3. 3 2
      religion_videos_update.py

+ 3 - 3
config.py

@@ -574,7 +574,7 @@ class BaseConfig(object):
 
     # 宗教视频更新使用数据
     RELIGION_VIDEOS_PROJECT = 'loghubods'
-    RELIGION_VIDEOS_TABLE = ''
+    RELIGION_VIDEOS_TABLE = 'religion_video'
     # 宗教视频列表更新结果存放 redis key 前缀,完整格式:'religion:videos:item:{date}'
     KEY_NAME_PREFIX_RELIGION_VIDEOS = 'religion:videos:item:'
 
@@ -901,8 +901,8 @@ class ProductionConfig(BaseConfig):
 
 def set_config():
     # 获取环境变量 ROV_OFFLINE_ENV
-    env = os.environ.get('ROV_OFFLINE_ENV')
-    # env = 'dev'
+    # env = os.environ.get('ROV_OFFLINE_ENV')
+    env = 'dev'
     if env is None:
         # log_.error('ENV ERROR: is None!')
         return

+ 96 - 2
laohaokan_recommend_update.py

@@ -6,20 +6,114 @@ from log import Log
 config_, env = set_config()
 log_ = Log()
 initial_param = {'data': 'data1', 'rule': 'rule4'}
+redis_helper = RedisHelper()
 
 
 def get_religion_videos(now_date):
     """获取宗教视频列表"""
-    redis_helper = RedisHelper()
     religion_key_name = f"{config_.KEY_NAME_PREFIX_RELIGION_VIDEOS}{datetime.datetime.strftime(now_date, '%Y%m%d')}"
     if not redis_helper.key_exists(religion_key_name):
         redis_dt = datetime.datetime.strftime((now_date - datetime.timedelta(days=1)), '%Y%m%d')
         religion_key_name = f"{config_.KEY_NAME_PREFIX_RELIGION_VIDEOS}{redis_dt}"
     religion_videos = redis_helper.get_all_data_from_zset(key_name=religion_key_name, desc=True, with_scores=True)
+    if religion_videos is None:
+        return []
     return religion_videos
 
 
-def merge_with_region(now_date, now_h, region):
+def merge_process(initial_key_name, new_key_name, now_videos, religion_video_id_list):
+    initial_data = redis_helper.get_all_data_from_zset(initial_key_name)
+    if initial_data is None or len(initial_data) == 0:
+        return now_videos, religion_video_id_list
+
+    initial_video_ids = [int(video_id) for video_id, _ in initial_data]
+    initial_video_ids = [video_id for video_id in initial_video_ids if video_id not in now_videos]
+    religion_video_id_list = [video_id for video_id in religion_video_id_list if video_id not in initial_video_ids]
+    if len(religion_video_id_list) == 0:
+        new_video_ids = initial_video_ids
+    else:
+        new_video_ids = []
+        for i, video_id in enumerate(initial_video_ids):
+            new_video_ids.append(video_id)
+            now_videos.append(video_id)
+            if i % 2 == 1 and len(religion_video_id_list) > 0:
+                new_video_ids.append(religion_video_id_list[0])
+                now_videos.append(religion_video_id_list[0])
+                religion_video_id_list = religion_video_id_list[1:]
+
+    # 按照排序给定分数
+    new_result = {}
+    step = 100 / (len(new_video_ids) * 2)
+    for i, video_id in enumerate(new_video_ids):
+        score = 100 - i * step
+        new_result[int(video_id)] = score
+    # 写入新的key中
+    redis_helper.add_data_with_zset(key_name=new_key_name, data=new_result, expire_time=23 * 3600)
+    return now_videos, religion_video_id_list
+
+
+def merge_with_region(now_date, now_h, region, religion_video_id_list):
+    initial_data_key = initial_param.get('data')
+    initial_rule_key = initial_param.get('rule')
+    new_data_key = new_param.get('data')
+    new_rule_key = new_param.get('rule')
+    now_videos = []
+    # 地域小时级数据合并
+    region_h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H}{region}:{initial_data_key}:{initial_rule_key}:" \
+                        f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+    new_region_h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H}{region}:{new_data_key}:{new_rule_key}:" \
+                            f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+    now_videos, religion_video_id_list = merge_process(initial_key_name=region_h_key_name,
+                                                       new_key_name=new_region_h_key_name,
+                                                       now_videos=now_videos,
+                                                       religion_video_id_list=religion_video_id_list)
+    # 地域24h数据合并
+    region_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{initial_data_key}:" \
+                          f"{initial_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+    new_region_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{new_data_key}:" \
+                              f"{new_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+    now_videos, religion_video_id_list = merge_process(initial_key_name=region_24h_key_name,
+                                                       new_key_name=new_region_24h_key_name,
+                                                       now_videos=now_videos,
+                                                       religion_video_id_list=religion_video_id_list)
+
+    # 24h筛选数据合并
+    dup2_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{initial_data_key}:" \
+                        f"{initial_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+    new_dup2_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{new_data_key}:" \
+                            f"{new_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+    now_videos, religion_video_id_list = merge_process(initial_key_name=dup2_24h_key_name,
+                                                       new_key_name=new_dup2_24h_key_name,
+                                                       now_videos=now_videos,
+                                                       religion_video_id_list=religion_video_id_list)
+
+    # 24h筛选后剩余数据合并
+    dup3_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{initial_data_key}:" \
+                        f"{initial_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+    new_dup3_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{new_data_key}:" \
+                            f"{new_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
+    now_videos, religion_video_id_list = merge_process(initial_key_name=dup3_24h_key_name,
+                                                       new_key_name=new_dup3_24h_key_name,
+                                                       now_videos=now_videos,
+                                                       religion_video_id_list=religion_video_id_list)
+
+
+
+    # region_h_data = redis_helper.get_all_data_from_zset(region_h_key_name)
+    # region_h_video_ids = [int(video_id) for video_id, _ in region_h_data]
+    # religion_video_id_list = [video_id for video_id in religion_video_id_list if video_id not in region_h_video_ids]
+    # new_region_h_video_ids = []
+    # for i, video_id in enumerate(region_h_video_ids):
+    #     new_region_h_video_ids.append(video_id)
+    #     if i % 2 == 1 and len(religion_video_id_list) > 0:
+    #         new_region_h_video_ids.append(religion_video_id_list[0])
+    #         religion_video_id_list = religion_video_id_list[1:]
+    # # 按照排序给定分数
+    # new_region_h_result = {}
+    # step = 100 / (len(new_region_h_video_ids) * 2)
+    # for i, video_id in enumerate(new_region_h_video_ids):
+    #     score = 100 - i * step
+    #     new_region_h_result[int(video_id)] = score
 
 
 

+ 3 - 2
religion_videos_update.py

@@ -9,7 +9,7 @@ from log import Log
 
 config_, env = set_config()
 log_ = Log()
-features = ['videoid', 'play_count', 'dt']
+features = ['videoid', 'play_count_total', 'gmt_create']
 
 
 def data_check(project, table, now_date):
@@ -48,7 +48,8 @@ def get_religion_videos(now_date, project, table):
         feature_data.append(item)
     feature_df = pd.DataFrame(feature_data)
     # 按照发布时间和播放量进行倒序
-    feature_df = feature_df.sort_values(by=['dt', 'play_count'], ascending=False)
+    feature_df = feature_df.sort_values(by=['gmt_create', 'play_count_total'], ascending=False)
+    print(feature_df)
     video_id_list = feature_df['videoid'].to_list()
     # 按照排序给定分数
     final_result = {}