liqian 2 yıl önce
ebeveyn
işleme
87ef3c33ee
5 değiştirilmiş dosya ile 42 ekleme ve 8 silme
  1. 7 2
      region_rule_rank_day.py
  2. 7 2
      region_rule_rank_h.py
  3. 12 2
      rule_rank_day.py
  4. 6 1
      rule_rank_h.py
  5. 10 1
      rule_rank_h_by_24h.py

+ 7 - 2
region_rule_rank_day.py

@@ -9,7 +9,7 @@ import pandas as pd
 import math
 from odps import ODPS
 from threading import Timer
-from utils import RedisHelper, get_data_from_odps
+from utils import RedisHelper, get_data_from_odps, filter_video_status
 from config import set_config
 from log import Log
 
@@ -147,12 +147,17 @@ def video_rank(df, now_date, rule_key, param, region):
     # videoid重复时,保留分值高
     h_recall_df = h_recall_df.sort_values(by=['score'], ascending=False)
     h_recall_df = h_recall_df.drop_duplicates(subset=['videoid'], keep='first')
+    h_recall_df['videoid'] = h_recall_df['videoid'].astype(int)
     h_recall_videos = h_recall_df['videoid'].to_list()
     log_.info(f'day_recall videos count = {len(h_recall_videos)}')
 
+    # 视频状态过滤
+    filtered_videos = filter_video_status(h_recall_videos)
+    log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
+
     # 写入对应的redis
     day_recall_result = {}
-    for video_id in h_recall_videos:
+    for video_id in filtered_videos:
         score = h_recall_df[h_recall_df['videoid'] == video_id]['score']
         # print(score)
         day_recall_result[int(video_id)] = float(score)

+ 7 - 2
region_rule_rank_h.py

@@ -9,7 +9,7 @@ import pandas as pd
 import math
 from odps import ODPS
 from threading import Timer
-from utils import MysqlHelper, RedisHelper, get_data_from_odps
+from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status
 from config import set_config
 from log import Log
 
@@ -173,13 +173,18 @@ def video_rank(df, now_date, now_h, rule_key, param, region):
     # videoid重复时,保留分值高
     h_recall_df = h_recall_df.sort_values(by=['score'], ascending=False)
     h_recall_df = h_recall_df.drop_duplicates(subset=['videoid'], keep='first')
+    h_recall_df['videoid'] = h_recall_df['videoid'].astype(int)
     h_recall_videos = h_recall_df['videoid'].to_list()
     log_.info(f'h_recall videos count = {len(h_recall_videos)}')
 
+    # 视频状态过滤
+    filtered_videos = filter_video_status(h_recall_videos)
+    log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
+
     # 写入对应的redis
     h_video_ids =[]
     h_recall_result = {}
-    for video_id in h_recall_videos:
+    for video_id in filtered_videos:
         score = h_recall_df[h_recall_df['videoid'] == video_id]['score']
         # print(score)
         h_recall_result[int(video_id)] = float(score)

+ 12 - 2
rule_rank_day.py

@@ -2,7 +2,7 @@ import pandas as pd
 from odps import ODPS
 from datetime import datetime, timedelta
 from threading import Timer
-from utils import get_data_from_odps
+from utils import get_data_from_odps, filter_video_status
 from db_helper import RedisHelper
 from config import set_config
 from log import Log
@@ -114,12 +114,22 @@ def video_rank_day(df, now_date, rule_key, param):
         day_recall_df = df[df['回流人数'] > return_count]
     else:
         day_recall_df = df
+
+    # videoid重复时,保留分值高
+    day_recall_df = day_recall_df.sort_values(by=['score'], ascending=False)
+    day_recall_df = day_recall_df.drop_duplicates(subset=['videoid'], keep='first')
+    day_recall_df['videoid'] = day_recall_df['videoid'].astype(int)
     day_recall_videos = day_recall_df['videoid'].to_list()
     log_.info(f'day_recall videos count = {len(day_recall_videos)}')
+
+    # 视频状态过滤
+    filtered_videos = filter_video_status(day_recall_videos)
+    log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
+
     # 写入对应的redis
     day_video_ids =[]
     day_recall_result = {}
-    for video_id in day_recall_videos:
+    for video_id in filtered_videos:
         score = day_recall_df[day_recall_df['videoid'] == video_id]['score']
         day_recall_result[int(video_id)] = float(score)
         day_video_ids.append(int(video_id))

+ 6 - 1
rule_rank_h.py

@@ -4,6 +4,7 @@ import math
 from odps import ODPS
 from threading import Timer
 from get_data import get_data_from_odps
+from utils import filter_video_status
 from db_helper import RedisHelper
 from config import set_config
 from log import Log
@@ -147,12 +148,16 @@ def video_rank(df, now_date, now_h, rule_key, param):
     return_count = param.get('return_count')
     score_value = param.get('score_rule')
     h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= score_value)]
+    h_recall_df['videoid'] = h_recall_df['videoid'].astype(int)
     h_recall_videos = h_recall_df['videoid'].to_list()
     log_.info(f'h_recall videos count = {len(h_recall_videos)}')
+    # 视频状态过滤
+    filtered_videos = filter_video_status(h_recall_videos)
+    log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
     # 写入对应的redis
     h_video_ids =[]
     h_recall_result = {}
-    for video_id in h_recall_videos:
+    for video_id in filtered_videos:
         score = h_recall_df[h_recall_df['videoid'] == video_id]['score']
         h_recall_result[int(video_id)] = float(score)
         h_video_ids.append(int(video_id))

+ 10 - 1
rule_rank_h_by_24h.py

@@ -5,6 +5,7 @@ from threading import Timer
 from datetime import datetime, timedelta
 from get_data import get_data_from_odps
 from db_helper import RedisHelper
+from utils import filter_video_status
 from config import set_config
 from log import Log
 
@@ -114,14 +115,22 @@ def video_rank_h(df, now_date, now_h, rule_key, param):
         day_recall_df = df[df['回流人数'] > return_count]
     else:
         day_recall_df = df
+    # videoid重复时,保留分值高
+    day_recall_df = day_recall_df.sort_values(by=['score'], ascending=False)
+    day_recall_df = day_recall_df.drop_duplicates(subset=['videoid'], keep='first')
+    day_recall_df['videoid'] = day_recall_df['videoid'].astype(int)
     day_recall_videos = day_recall_df['videoid'].to_list()
     log_.info(f'h_by24h_recall videos count = {len(day_recall_videos)}')
 
+    # 视频状态过滤
+    filtered_videos = filter_video_status(day_recall_videos)
+    log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
+
     # 写入对应的redis
     now_dt = datetime.strftime(now_date, '%Y%m%d')
     day_video_ids = []
     day_recall_result = {}
-    for video_id in day_recall_videos:
+    for video_id in filtered_videos:
         score = day_recall_df[day_recall_df['videoid'] == video_id]['score']
         day_recall_result[int(video_id)] = float(score)
         day_video_ids.append(int(video_id))