liqian 3 years ago
parent
commit
ac75d7c0e1
1 changed files with 57 additions and 8 deletions
  1. 57 8
      rule_rank_h_18_19.py

+ 57 - 8
rule_rank_h_18_19.py

@@ -4,17 +4,20 @@
 # @Time: 2022/4/21 下午4:31
 # @Software: PyCharm
 
+import time
 import datetime
 import pandas as pd
 import math
+import random
 from odps import ODPS
 from threading import Timer
 from get_data import get_data_from_odps
-from db_helper import RedisHelper
+from db_helper import RedisHelper, MysqlHelper
 from config import set_config
 from log import Log
+from utils import filter_video_status
 
-config_, _ = set_config()
+config_, env = set_config()
 log_ = Log()
 
 features = [
@@ -186,11 +189,57 @@ def h_timer_check(app_type):
         Timer(60, h_timer_check).start()
 
 
-if __name__ == '__main__':
-    # df1 = get_feature_data()
-    # res = cal_score(df=df1)
-    # video_rank(df=res, now_date=datetime.datetime.today())
-    # rank_by_h()
-    app_type_list = [18, 19]
+def predict(app_type_list):
     for app_type in app_type_list:
         h_timer_check(app_type=app_type)
+
+
+def predict_test(app_type_list):
+    now_date = datetime.datetime.today()
+    now_h = datetime.datetime.now().hour
+    log_.info(f"now_date = {datetime.datetime.strftime(now_date, '%Y%m%d%H')}, now_h = {now_h}")
+    # 获取测试环境中最近发布的40000条视频
+    sql = "SELECT id FROM wx_video ORDER BY id DESC LIMIT 40000;"
+    mysql_helper = MysqlHelper(mysql_info=config_.MYSQL_INFO)
+    data = mysql_helper.get_data(sql=sql)
+    video_ids = [video[0] for video in data]
+    # 视频状态过滤
+    filtered_videos = filter_video_status(video_ids)
+    log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
+
+    for app_type in app_type_list:
+        log_.info(f"app_type = {app_type}")
+        videos_temp = random.sample(filtered_videos, 300)
+        redis_data_temp = {}
+        csv_data_temp = []
+        for video_id in videos_temp:
+            score = random.uniform(0, 100)
+            redis_data_temp[video_id] = score
+            csv_data_temp.append({'video_id': video_id, 'rov_score': score})
+        # 打包预测结果存入csv
+        score_df = pd.DataFrame(data=csv_data_temp, columns=['video_id', 'rov_score'])
+        score_df = score_df.sort_values(by=['rov_score'], ascending=False)
+        score_filename = f"score_{app_type}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.csv"
+        score_df.to_csv(f'./data/{score_filename}', index=False)
+
+        # 存入对应的redis
+        key_name = \
+            f"{config_.RECALL_KEY_NAME_PREFIX_APP_TYPE}{app_type}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
+        redis_helper = RedisHelper()
+        redis_helper.add_data_with_zset(key_name=key_name, data=redis_data_temp)
+        log_.info('data to redis finished!')
+
+
+if __name__ == '__main__':
+    app_type_list = [config_.APP_TYPE['LAO_HAO_KAN_VIDEO'], config_.APP_TYPE['ZUI_JING_QI']]
+    log_.info(f'appType: {app_type_list} predict start...')
+    predict_start = time.time()
+    if env in ['dev', 'test']:
+        predict_test(app_type_list=app_type_list)
+    elif env in ['pre', 'pro']:
+        predict(app_type_list=app_type_list)
+    else:
+        log_.error('env error')
+    predict_end = time.time()
+    log_.info('appType: [18, 19] predict end, execute time = {}ms'.format((predict_end - predict_start) * 1000))
+