Quellcode durchsuchen

Merge remote-tracking branch 'origin/master' into feature_20240507_supeng_supply_ab

supeng vor 11 Monaten
Ursprung
Commit
e8079d9ff0
100 geänderte Dateien mit 471 neuen und 205 gelöschten Zeilen
  1. 0 0
      __if__.py
  2. 2 2
      ad_arpu_update.py
  3. 2 2
      ad_ecpm_update.py
  4. 2 2
      ad_no_ad_videos_update.py
  5. 3 3
      ad_out_v1_get_offline_score_item.py
  6. 3 3
      ad_out_v1_get_offline_score_item_new.py
  7. 3 3
      ad_out_v1_get_offline_score_item_v2.py
  8. 3 3
      ad_out_v1_get_offline_score_item_v2_debug.py
  9. 3 3
      ad_out_v1_get_offline_score_user.py
  10. 3 3
      ad_out_v1_get_offline_score_user_new.py
  11. 3 3
      ad_out_v1_get_offline_score_user_v2.py
  12. 3 3
      ad_out_v1_get_offline_score_user_v2_debug.py
  13. 3 3
      ad_out_v1_get_offline_score_user_v3.py
  14. 3 3
      ad_out_v1_get_offline_score_user_v3_debug.py
  15. 2 2
      ad_out_v1_set_config.py
  16. 1 1
      ad_roi_param_update.py
  17. 2 2
      ad_threshold_auto_update.py
  18. 2 2
      ad_user_data_update_with_new_strategy.py
  19. 2 2
      ad_user_data_with_out_update.py
  20. 2 2
      ad_user_video_predict.py
  21. 2 2
      ad_users_data_update.py
  22. 2 2
      ad_users_data_update_new.py
  23. 2 2
      ad_video_data_update.py
  24. 2 2
      ad_video_data_update_with_new_strategy.py
  25. 66 0
      alg_recsys_coldstart_offlinecheck.py
  26. 27 12
      alg_recsys_rank_item_realtime_1day.py
  27. 3 3
      alg_recsys_rank_item_realtime_1h.py
  28. 3 3
      alg_recsys_rank_item_realtime_1hroot.py
  29. 4 4
      alg_recsys_rank_item_realtime_1hrootall.py
  30. 1 1
      alg_recsys_rank_item_realtime_1hrootall_task.sh
  31. 2 2
      alg_recsys_recall02_1h_region.py
  32. 2 2
      alg_recsys_recall_1h_noregion.py
  33. 2 2
      alg_recsys_recall_1h_region.py
  34. 2 2
      alg_recsys_recall_24h_noregion.py
  35. 2 2
      alg_recsys_recall_24h_region.py
  36. 2 2
      alg_recsys_recall_4h_region_trend.py
  37. 2 2
      alg_recsys_recall_aftermerge.py
  38. 2 2
      alg_recsys_recall_shield_videos.py
  39. 2 2
      alg_recsys_recall_tags_videos.py
  40. 2 2
      alg_recsys_recall_tags_videos_v2.py
  41. 2 2
      alg_recsys_recall_undertake.py
  42. 1 1
      app_rank_h.py
  43. 2 2
      bottom_videos.py
  44. 2 2
      bottom_videos_update.py
  45. 2 2
      cal_24h_score.py
  46. 2 2
      cal_hour_score.py
  47. 2 2
      check_video_limit_distribute.py
  48. 2 2
      check_video_limit_distribute_new.py
  49. 2 2
      compose_score.py
  50. 1 1
      data_expire_time_update.py
  51. 2 2
      data_monitor.py
  52. 1 1
      db_helper.py
  53. 2 2
      export_24h_vid.py
  54. 2 2
      export_hour_vid.py
  55. 1 1
      flowpool_abtest_config_update.py
  56. 2 2
      flowpool_data_update.py
  57. 26 5
      flowpool_data_update_with_level.py
  58. 2 2
      flowpool_data_update_with_level_score.py
  59. 1 1
      flowpool_level_weight_update.py
  60. 2 2
      get_data.py
  61. 2 2
      get_video_limit_list.py
  62. 2 2
      laohaokan_recommend_update.py
  63. 1 1
      log_conf.py
  64. 0 0
      my_config.py
  65. 1 1
      my_utils.py
  66. 2 2
      old_video_recall.py
  67. 2 2
      pool_predict.py
  68. 2 2
      recommend_region_data_dup.py
  69. 1 1
      recommend_region_data_status_update.py
  70. 2 2
      redis_data_monitor.py
  71. 2 2
      redis_test.py
  72. 2 2
      region_rule_rank_day.py
  73. 2 2
      region_rule_rank_h.py
  74. 2 2
      region_rule_rank_h_by24h.py
  75. 2 2
      region_rule_rank_h_new.py
  76. 2 2
      region_rule_rank_h_v2.py
  77. 2 2
      relevant_top_videos.py
  78. 2 2
      religion_class_user_update.py
  79. 2 2
      religion_class_videos_update.py
  80. 1 1
      religion_videos_update.py
  81. 2 2
      rov_data_check.py
  82. 2 2
      rov_train.py
  83. 2 2
      rule_rank_day.py
  84. 2 2
      rule_rank_day_by_30day.py
  85. 2 2
      rule_rank_h.py
  86. 2 2
      rule_rank_h_18_19.py
  87. 2 2
      rule_rank_h_by_24h.py
  88. 2 2
      rule_rank_h_by_48h.py
  89. 2 2
      rule_rank_h_new.py
  90. 2 2
      shield_videos.py
  91. 2 2
      special_mid_videos_update.py
  92. 2 2
      special_mids_update.py
  93. 2 2
      top_video_list.py
  94. 1 1
      update_in_flowpool_count_switch.py
  95. 2 2
      user_group_update.py
  96. 2 2
      videos_filter.py
  97. 2 2
      videos_similarity.py
  98. 2 2
      whole_movies_update.py
  99. 0 0
      write_redis/__if__.py
  100. 164 0
      write_redis/alg_recsys_feature_01_vid2titletags_redis.py

+ 0 - 0
__if__.py


+ 2 - 2
ad_arpu_update.py

@@ -3,8 +3,8 @@ import json
 import time
 import traceback
 from threading import Timer
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
ad_ecpm_update.py

@@ -2,9 +2,9 @@ import datetime
 import json
 import traceback
 
-from utils import request_get, send_msg_to_feishu
+from my_utils import request_get, send_msg_to_feishu
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
ad_no_ad_videos_update.py

@@ -1,8 +1,8 @@
 import datetime
 import json
 import traceback
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu_new
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu_new
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 3 - 3
ad_out_v1_get_offline_score_item.py

@@ -5,8 +5,8 @@ import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()
@@ -14,7 +14,7 @@ redis_helper = RedisHelper()
 
 from feature import get_item_features
 from lr_model import LrModel
-from utils import exe_sql
+from my_utils import exe_sql
 
 def update_offline_score_item(dt):
     project = 'loghubods'

+ 3 - 3
ad_out_v1_get_offline_score_item_new.py

@@ -4,8 +4,8 @@ import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 from records_process import records_process
 
@@ -15,7 +15,7 @@ redis_helper = RedisHelper()
 
 from feature import get_item_features as get_features
 from lr_model import LrModel
-from utils import exe_sql
+from my_utils import exe_sql
 
 model_key = 'ad_out_v1'
 lr_model = LrModel('model/{}.json'.format(model_key))

+ 3 - 3
ad_out_v1_get_offline_score_item_v2.py

@@ -4,8 +4,8 @@ import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 from records_process import records_process
 
@@ -15,7 +15,7 @@ redis_helper = RedisHelper()
 
 from feature import get_item_features as get_features
 from lr_model import LrModel
-from utils import exe_sql
+from my_utils import exe_sql
 
 model_key = 'ad_out_v1'
 lr_model = LrModel('model/{}.json'.format(model_key))

+ 3 - 3
ad_out_v1_get_offline_score_item_v2_debug.py

@@ -5,8 +5,8 @@ import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()
@@ -14,7 +14,7 @@ redis_helper = RedisHelper()
 
 from feature import get_item_features
 from lr_model import LrModel
-from utils import exe_sql
+from my_utils import exe_sql
 
 def update_offline_score_item(dt):
     project = 'loghubods'

+ 3 - 3
ad_out_v1_get_offline_score_user.py

@@ -5,8 +5,8 @@ import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()
@@ -14,7 +14,7 @@ redis_helper = RedisHelper()
 
 from feature import get_user_features
 from lr_model import LrModel
-from utils import exe_sql
+from my_utils import exe_sql
 
 def update_offline_score_user(dt):
     project = 'loghubods'

+ 3 - 3
ad_out_v1_get_offline_score_user_new.py

@@ -4,8 +4,8 @@ import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 from records_process import records_process
 
@@ -15,7 +15,7 @@ redis_helper = RedisHelper()
 
 from feature import get_user_features as get_features
 from lr_model import LrModel
-from utils import exe_sql
+from my_utils import exe_sql
 
 model_key = 'ad_out_v1'
 lr_model = LrModel('model/{}.json'.format(model_key))

+ 3 - 3
ad_out_v1_get_offline_score_user_v2.py

@@ -4,8 +4,8 @@ import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 from records_process import records_process
 
@@ -15,7 +15,7 @@ redis_helper = RedisHelper()
 
 from feature import get_user_features as get_features
 from lr_model import LrModel
-from utils import exe_sql
+from my_utils import exe_sql
 
 model_key = 'ad_out_v1'
 lr_model = LrModel('model/{}.json'.format(model_key))

+ 3 - 3
ad_out_v1_get_offline_score_user_v2_debug.py

@@ -5,8 +5,8 @@ import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()
@@ -14,7 +14,7 @@ redis_helper = RedisHelper()
 
 from feature import get_user_features
 from lr_model import LrModel
-from utils import exe_sql
+from my_utils import exe_sql
 
 def update_offline_score_user(dt):
     project = 'loghubods'

+ 3 - 3
ad_out_v1_get_offline_score_user_v3.py

@@ -4,8 +4,8 @@ import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 from records_process import records_process
 
@@ -15,7 +15,7 @@ redis_helper = RedisHelper()
 
 from feature import get_user_features as get_features
 from lr_model import LrModel
-from utils import exe_sql
+from my_utils import exe_sql
 
 model_key = 'ad_out_v1'
 lr_model = LrModel('model/{}.json'.format(model_key))

+ 3 - 3
ad_out_v1_get_offline_score_user_v3_debug.py

@@ -5,8 +5,8 @@ import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()
@@ -14,7 +14,7 @@ redis_helper = RedisHelper()
 
 from feature import get_user_features
 from lr_model import LrModel
-from utils import exe_sql
+from my_utils import exe_sql
 
 def update_offline_score_user(dt):
     project = 'loghubods'

+ 2 - 2
ad_out_v1_set_config.py

@@ -2,8 +2,8 @@
 import datetime
 import traceback
 from threading import Timer
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 1 - 1
ad_roi_param_update.py

@@ -1,6 +1,6 @@
 import datetime
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
ad_threshold_auto_update.py

@@ -7,8 +7,8 @@ from threading import Timer
 
 import pandas as pd
 
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu, request_get
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu, request_get
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
ad_user_data_update_with_new_strategy.py

@@ -1,8 +1,8 @@
 import datetime
 import traceback
 from threading import Timer
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
ad_user_data_with_out_update.py

@@ -1,8 +1,8 @@
 import datetime
 import traceback
 from threading import Timer
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
ad_user_video_predict.py

@@ -4,8 +4,8 @@ import traceback
 import numpy as np
 import pandas as pd
 from odps import ODPS
-from utils import data_check, get_feature_data, send_msg_to_feishu_new, RedisHelper
-from config import set_config
+from my_utils import data_check, get_feature_data, send_msg_to_feishu_new, RedisHelper
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
ad_users_data_update.py

@@ -1,8 +1,8 @@
 import datetime
 import traceback
 from threading import Timer
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
ad_users_data_update_new.py

@@ -1,8 +1,8 @@
 import datetime
 import traceback
 from threading import Timer
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
ad_video_data_update.py

@@ -2,8 +2,8 @@ import datetime
 import traceback
 import multiprocessing
 from threading import Timer
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu, send_msg_to_feishu_new
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu, send_msg_to_feishu_new
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
ad_video_data_update_with_new_strategy.py

@@ -2,8 +2,8 @@ import datetime
 import traceback
 import multiprocessing
 from threading import Timer
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu, send_msg_to_feishu_new
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu, send_msg_to_feishu_new
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 66 - 0
alg_recsys_coldstart_offlinecheck.py

@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+import time
+import traceback
+from my_config import set_config
+from log import Log
+from my_utils import execute_sql_from_odps
+from db_helper import RedisHelper
+from datetime import datetime, timedelta
+
+from alg_recsys_recall_4h_region_trend import records_process_for_list
+config_, _ = set_config()
+log_ = Log()
+redis_helper = RedisHelper()
+
+
+
+def main():
+    date_write = ""
+    while True:
+        date_cur = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        if "2024-05-10 20:3" in date_cur:
+            print("退出")
+            break
+        # if date_write == date_cur:
+        #     time.sleep(10)
+        #     continue
+
+        for level in ["1", "2", "3", "4"]:
+            key = "flow:pool:level:item:4:" + level
+            result = redis_helper.get_data_from_set(key)
+            if not result:
+                result = []
+            size = len(result)
+            log_str = "\t".join([date_cur, "4", level, str(size)])
+            # print(log_str)
+            log_.info(log_str)
+        for level in ["1", "2", "3", "4"]:
+            key = "flow:pool:level:item:0:" + level
+            result = redis_helper.get_data_from_set(key)
+            if not result:
+                result = []
+            size = len(result)
+            log_str = "\t".join([date_cur, "0", level, str(size)])
+            # print(log_str)
+            log_.info(log_str)
+        for level in ["1", "2", "3", "4"]:
+            key = "flow:pool:level:item:21:" + level
+            result = redis_helper.get_data_from_set(key)
+            if not result:
+                result = []
+            size = len(result)
+            log_str = "\t".join([date_cur, "21", level, str(size)])
+            # print(log_str)
+            log_.info(log_str)
+        time.sleep(30)
+        # date_write = datetime.now().strftime("%Y-%m-%d %H:%M")
+
+
+if __name__ == '__main__':
+    main()
+
+
+
+
+# cd /root/zhangbo/rov-offline
+# nohup python alg_recsys_coldstart_offlinecheck.py > p.log 2>&1 &

+ 27 - 12
alg_recsys_rank_item_realtime_1day.py

@@ -3,14 +3,14 @@ import traceback
 import datetime
 from odps import ODPS
 from threading import Timer
-from utils import RedisHelper, get_data_from_odps, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, get_data_from_odps, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 from alg_recsys_recall_4h_region_trend import records_process_for_list
 import json
 from datetime import datetime, timedelta
 import sys
-from utils import execute_sql_from_odps
+from my_utils import execute_sql_from_odps
 
 
 config_, _ = set_config()
@@ -19,6 +19,7 @@ redis_helper = RedisHelper()
 
 REDIS_PREFIX = "item_rt_fea_1day_"
 
+
 def process_and_store(row):
     video_id, json_str = row
     key = REDIS_PREFIX + str(video_id)
@@ -26,6 +27,7 @@ def process_and_store(row):
     redis_helper.set_data_to_redis(key, json_str, expire_time)
     # log_.info("video写入数据key={},value={}".format(key, json_str))
 
+
 def check_data(project, table, partition) -> int:
     """检查数据是否准备好,输出数据条数"""
     odps = ODPS(
@@ -51,10 +53,12 @@ def check_data(project, table, partition) -> int:
             log_.info("表{}分区{}不存在".format(table, partition))
             data_count = 0
     except Exception as e:
-        log_.error("table:{},partition:{} no data. return data_count=0:{}".format(table, partition, e))
+        log_.error("table:{},partition:{} no data. return data_count=0:{}".format(
+            table, partition, e))
         data_count = 0
     return data_count
 
+
 def get_sql(date, previous_date_str, project):
     sql = '''
     SELECT  videoid
@@ -72,6 +76,8 @@ def get_sql(date, previous_date_str, project):
             ,CONCAT_WS(',',COLLECT_LIST(CONCAT(dt,":",2day_share_pv))) AS share_pv_list_2day
             ,CONCAT_WS(',',COLLECT_LIST(CONCAT(dt,":",3day_share_uv))) AS share_uv_list_3day
             ,CONCAT_WS(',',COLLECT_LIST(CONCAT(dt,":",3day_share_pv))) AS share_pv_list_3day
+            ,CONCAT_WS(',',COLLECT_LIST(CONCAT(dt,":",1day_sharedepth_max_avg))) AS sharedepth_max_avg_list_1day
+            ,CONCAT_WS(',',COLLECT_LIST(CONCAT(dt,":",1day_sharewidth_max_avg))) AS sharewidth_max_avg_list_1day
     FROM    (
                 SELECT  videoid
                         ,dt
@@ -89,6 +95,8 @@ def get_sql(date, previous_date_str, project):
                         ,SUM(lasttwodays_share_total) AS 2day_share_pv
                         ,SUM(lastthreedays_share) AS 3day_share_uv
                         ,SUM(lastthreedays_share_total) AS 3day_share_pv
+                        ,SUM(sharedepth_max_avg) AS 1day_sharedepth_max_avg
+                        ,SUM(sharewidth_max_avg) AS 1day_sharewidth_max_avg
                 FROM    loghubods.video_data_each_hour_dataset_24h_total_apptype
                 WHERE   dt <= '{}23'
                 AND     dt >= '{}00'
@@ -144,6 +152,14 @@ def get_sql(date, previous_date_str, project):
                 m["p_return_uv_list_1day"] = record['p_return_uv_list_1day']
             except Exception as e:
                 log_.error(e)
+            try:
+                m["sharedepth_max_avg_list_1day"] = record['sharedepth_max_avg_list_1day']
+            except Exception as e:
+                log_.error(e)
+            try:
+                m["sharewidth_max_avg_list_1day"] = record['sharewidth_max_avg_list_1day']
+            except Exception as e:
+                log_.error(e)
             json_str = json.dumps(m)
             video_list.append([video_id, json_str])
     return video_list
@@ -169,15 +185,16 @@ def h_timer_check():
     else:
         log_.info("上游数据就绪,count={},开始读取数据表".format(table_data_cnt))
         # 2 读取数据表 处理特征
-        previous_date_str = (datetime.strptime(date, "%Y%m%d") - timedelta(days=1)).strftime("%Y%m%d")
+        previous_date_str = (datetime.strptime(
+            date, "%Y%m%d") - timedelta(days=1)).strftime("%Y%m%d")
         video_list = get_sql(date, previous_date_str, project)
         # 3 写入redis
         log_.info("video的数据量:{}".format(len(video_list)))
-        records_process_for_list(video_list, process_and_store, max_size=50, num_workers=8)
-
-        redis_helper.set_data_to_redis(REDIS_PREFIX + "partition", partition, 24 * 3600)
-
+        records_process_for_list(
+            video_list, process_and_store, max_size=50, num_workers=8)
 
+        redis_helper.set_data_to_redis(
+            REDIS_PREFIX + "partition", partition, 24 * 3600)
 
 
 if __name__ == '__main__':
@@ -186,7 +203,5 @@ if __name__ == '__main__':
     log_.info("完成执行:" + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
 
 
-
-
 # cd /root/zhangbo/rov-offline
-# python alg_recsys_rank_item_realtime_1day.py 20240117 20
+# python alg_recsys_rank_item_realtime_1day.py 20240117 20

+ 3 - 3
alg_recsys_rank_item_realtime_1h.py

@@ -3,14 +3,14 @@ import traceback
 import datetime
 from odps import ODPS
 from threading import Timer
-from utils import RedisHelper, get_data_from_odps, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, get_data_from_odps, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 from alg_recsys_recall_4h_region_trend import records_process_for_list
 import json
 from datetime import datetime, timedelta
 import sys
-from utils import execute_sql_from_odps
+from my_utils import execute_sql_from_odps
 
 
 config_, _ = set_config()

+ 3 - 3
alg_recsys_rank_item_realtime_1hroot.py

@@ -3,14 +3,14 @@ import traceback
 import datetime
 from odps import ODPS
 from threading import Timer
-from utils import RedisHelper, get_data_from_odps, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, get_data_from_odps, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 from alg_recsys_recall_4h_region_trend import records_process_for_list
 import json
 from datetime import datetime, timedelta
 import sys
-from utils import execute_sql_from_odps
+from my_utils import execute_sql_from_odps
 
 
 config_, _ = set_config()

+ 4 - 4
alg_recsys_rank_item_realtime_1hrootall.py

@@ -3,14 +3,14 @@ import traceback
 import datetime
 from odps import ODPS
 from threading import Timer
-from utils import RedisHelper, get_data_from_odps, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, get_data_from_odps, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 from alg_recsys_recall_4h_region_trend import records_process_for_list
 import json
 from datetime import datetime, timedelta
 import sys
-from utils import execute_sql_from_odps
+from my_utils import execute_sql_from_odps
 
 
 config_, _ = set_config()
@@ -141,4 +141,4 @@ if __name__ == '__main__':
 
 
 # cd /root/zhangbo/rov-offline
-# python alg_recsys_rank_item_realtime_1hrootall.py 20240408 14
+# python alg_recsys_recall_01_vid2titletags_redis.py 20240408 14

+ 1 - 1
alg_recsys_rank_item_realtime_1hrootall_task.sh

@@ -17,4 +17,4 @@ elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
   echo "all done"
 fi
 
-#sh alg_recsys_rank_item_realtime_1hrootall_task.sh
+#sh alg_recsys_recall_01_vid2titletags_redis_task.sh

+ 2 - 2
alg_recsys_recall02_1h_region.py

@@ -8,9 +8,9 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer
-from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
+from my_utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
     check_table_partition_exits, filter_video_status_app, send_msg_to_feishu, filter_political_videos
-from config import set_config
+from my_config import set_config
 from log import Log
 from check_video_limit_distribute import update_limit_video_score
 

+ 2 - 2
alg_recsys_recall_1h_noregion.py

@@ -7,8 +7,8 @@ from threading import Timer
 from datetime import datetime, timedelta
 from get_data import get_data_from_odps
 from db_helper import RedisHelper
-from utils import filter_video_status, check_table_partition_exits, filter_video_status_app, send_msg_to_feishu
-from config import set_config
+from my_utils import filter_video_status, check_table_partition_exits, filter_video_status_app, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
alg_recsys_recall_1h_region.py

@@ -8,9 +8,9 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer
-from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
+from my_utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
     check_table_partition_exits, filter_video_status_app, send_msg_to_feishu, filter_political_videos
-from config import set_config
+from my_config import set_config
 from log import Log
 from check_video_limit_distribute import update_limit_video_score
 

+ 2 - 2
alg_recsys_recall_24h_noregion.py

@@ -7,9 +7,9 @@ from threading import Timer
 from datetime import datetime, timedelta
 from get_data import get_data_from_odps
 from db_helper import RedisHelper
-from utils import filter_video_status, check_table_partition_exits, filter_video_status_app, \
+from my_utils import filter_video_status, check_table_partition_exits, filter_video_status_app, \
     request_post, send_msg_to_feishu
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
alg_recsys_recall_24h_region.py

@@ -8,9 +8,9 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer, Thread
-from utils import RedisHelper, get_data_from_odps, filter_video_status, check_table_partition_exits, \
+from my_utils import RedisHelper, get_data_from_odps, filter_video_status, check_table_partition_exits, \
     filter_video_status_app, send_msg_to_feishu
-from config import set_config
+from my_config import set_config
 from log import Log
 
 # os.environ['NUMEXPR_MAX_THREADS'] = '16'

+ 2 - 2
alg_recsys_recall_4h_region_trend.py

@@ -3,8 +3,8 @@ import traceback
 import datetime
 from odps import ODPS
 from threading import Timer
-from utils import RedisHelper, get_data_from_odps, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, get_data_from_odps, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 from queue import Queue
 from tqdm import tqdm

+ 2 - 2
alg_recsys_recall_aftermerge.py

@@ -9,9 +9,9 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer, Thread
-from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
+from my_utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
     check_table_partition_exits, filter_video_status_app, send_msg_to_feishu, filter_political_videos
-from config import set_config
+from my_config import set_config
 from log import Log
 from check_video_limit_distribute import update_limit_video_score
 

+ 2 - 2
alg_recsys_recall_shield_videos.py

@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 import traceback
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import execute_sql_from_odps
+from my_utils import execute_sql_from_odps
 from db_helper import RedisHelper
 import datetime
 import json

+ 2 - 2
alg_recsys_recall_tags_videos.py

@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 import traceback
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import execute_sql_from_odps
+from my_utils import execute_sql_from_odps
 from db_helper import RedisHelper
 from datetime import datetime, timedelta
 

+ 2 - 2
alg_recsys_recall_tags_videos_v2.py

@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 import traceback
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import execute_sql_from_odps
+from my_utils import execute_sql_from_odps
 from db_helper import RedisHelper
 from datetime import datetime, timedelta
 from odps import ODPS

+ 2 - 2
alg_recsys_recall_undertake.py

@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 import traceback
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import execute_sql_from_odps
+from my_utils import execute_sql_from_odps
 from db_helper import RedisHelper
 from datetime import datetime, timedelta
 

+ 1 - 1
app_rank_h.py

@@ -8,7 +8,7 @@ from datetime import datetime as dt
 from threading import Timer
 from log import Log
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from odps import ODPS
 
 log_ = Log()

+ 2 - 2
bottom_videos.py

@@ -5,9 +5,9 @@ import traceback
 import os
 import json
 
-from utils import execute_sql_from_odps, request_post, update_video_w_h_rate
+from my_utils import execute_sql_from_odps, request_post, update_video_w_h_rate
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, env = set_config()

+ 2 - 2
bottom_videos_update.py

@@ -8,9 +8,9 @@ import traceback
 import datetime
 
 from pool_predict import get_videos_from_flow_pool
-from utils import filter_video_status
+from my_utils import filter_video_status
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
cal_24h_score.py

@@ -3,8 +3,8 @@ import sys
 import math
 import traceback
 import pandas as pd
-from utils import send_msg_to_feishu
-from config import set_config
+from my_utils import send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
cal_hour_score.py

@@ -3,8 +3,8 @@ import sys
 import traceback
 import math
 import pandas as pd
-from utils import send_msg_to_feishu
-from config import set_config
+from my_utils import send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
check_video_limit_distribute.py

@@ -1,9 +1,9 @@
 import gevent
 import datetime
 import numpy as np
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import RedisHelper
+from my_utils import RedisHelper
 
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
check_video_limit_distribute_new.py

@@ -1,9 +1,9 @@
 import gevent
 import datetime
 import numpy as np
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import RedisHelper
+from my_utils import RedisHelper
 
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
compose_score.py

@@ -2,8 +2,8 @@ import sys
 import traceback
 import pandas as pd
 from db_helper import RedisHelper
-from utils import send_msg_to_feishu
-from config import set_config
+from my_utils import send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 1 - 1
data_expire_time_update.py

@@ -1,6 +1,6 @@
 import datetime
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
data_monitor.py

@@ -3,9 +3,9 @@ import numpy as np
 import pandas as pd
 import datetime
 
-from config import set_config
+from my_config import set_config
 from rov_train import process_data, process_predict_data
-from utils import send_msg_to_feishu
+from my_utils import send_msg_to_feishu
 
 config_, env = set_config()
 

+ 1 - 1
db_helper.py

@@ -2,7 +2,7 @@
 import redis
 import psycopg2
 import pymysql
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
export_24h_vid.py

@@ -1,8 +1,8 @@
 import sys
 import traceback
 import pandas as pd
-from utils import get_data_from_odps, send_msg_to_feishu
-from config import set_config
+from my_utils import get_data_from_odps, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
export_hour_vid.py

@@ -1,8 +1,8 @@
 import sys
 import traceback
 import pandas as pd
-from utils import get_data_from_odps, send_msg_to_feishu
-from config import set_config
+from my_utils import get_data_from_odps, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 1 - 1
flowpool_abtest_config_update.py

@@ -4,7 +4,7 @@
 # @Time: 2023/9/23 13:31
 # @Software: PyCharm
 import json
-from config import set_config
+from my_config import set_config
 from log import Log
 from db_helper import RedisHelper
 

+ 2 - 2
flowpool_data_update.py

@@ -5,8 +5,8 @@ import os
 import traceback
 import random
 
-from config import set_config
-from utils import request_post, filter_video_status, send_msg_to_feishu, filter_video_status_app, \
+from my_config import set_config
+from my_utils import request_post, filter_video_status, send_msg_to_feishu, filter_video_status_app, \
     filter_political_videos
 from log import Log
 from db_helper import RedisHelper

+ 26 - 5
flowpool_data_update_with_level.py

@@ -6,8 +6,8 @@ import traceback
 import random
 import json
 
-from config import set_config
-from utils import request_post, filter_video_status, send_msg_to_feishu, filter_video_status_app, \
+from my_config import set_config
+from my_utils import request_post, filter_video_status, send_msg_to_feishu, filter_video_status_app, \
     filter_political_videos
 from log import Log
 from db_helper import RedisHelper
@@ -30,9 +30,7 @@ def get_videos_from_flow_pool(app_type, size=1000):
     request_data = {'appType': app_type, 'batchFlag': batch_flag, 'size': size, 'isSupply': 0}
     videos = []
     retry = 0
-    print("zhangbo1")
     while True:
-        print("zhangbo2")
         print(config_.GET_VIDEOS_FROM_POOL_URL)
         result = request_post(request_url=config_.GET_VIDEOS_FROM_POOL_URL, request_data=request_data)
         if result is None:
@@ -87,7 +85,7 @@ def get_videos_remain_view_count(video_info_list):
             if distribute_count > 0:
                 # 将分发数更新到本地记录
                 key_name = f"{config_.LOCAL_DISTRIBUTE_COUNT_PREFIX}{item['videoId']}:{item['flowPool']}"
-                redis_helper.set_data_to_redis(key_name=key_name, value=distribute_count, expire_time=15 * 60)
+                redis_helper.set_data_to_redis(key_name=key_name, value=distribute_count, expire_time=25 * 60)
             else:
                 # 将本地记录删除
                 key_name = f"{config_.LOCAL_DISTRIBUTE_COUNT_PREFIX}{item['videoId']}:{item['flowPool']}"
@@ -250,6 +248,12 @@ def get_flow_pool_data(app_type, video_info_list, flow_pool_id_list):
             # 写入redis
             if videos:
                 redis_helper.add_data_with_set(key_name=flow_pool_key_name, values=videos, expire_time=24 * 3600)
+                result = redis_helper.get_data_from_set(flow_pool_key_name)
+                if not result:
+                    result = []
+                size = len(result)
+                log_.info(f'写入成功key={flow_pool_key_name}:{size}')
+
 
         # 删除此时不存在的level key
         for i in range(1, 7):
@@ -258,6 +262,7 @@ def get_flow_pool_data(app_type, video_info_list, flow_pool_id_list):
                 # 如果key已存在,删除key
                 if redis_helper.key_exists(flow_pool_key_name):
                     redis_helper.del_keys(flow_pool_key_name)
+                    log_.info(f'删除不存在key={flow_pool_key_name}')
 
         log_.info('data to redis finished!')
 
@@ -294,6 +299,20 @@ def get_data_from_odps(project, sql):
         data_df = None
     return data_df
 
+def print_check_log(s):
+    log_.info(s)
+    apptype_list = ["0", "4", "5", "21", "3", "6"]
+    level_list = ["1", "2", "3", "4"]
+    date_cur = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    for app in apptype_list:
+        for level in level_list:
+            key =  "flow:pool:level:item:" + app + ":" + level
+            result = redis_helper.get_data_from_set(key)
+            if not result:
+                result = []
+            size = len(result)
+            log_str = "\t".join([date_cur, app, level, str(size)])
+            log_.info(log_str)
 
 if __name__ == '__main__':
     st_time = time.time()
@@ -323,8 +342,10 @@ if __name__ == '__main__':
 
     # 更新剩余分发数
     log_.info(f"video_info_list count = {len(video_info_list)}")
+    print_check_log("--------------------前序日志--------------------")
     get_videos_remain_view_count(video_info_list)
     log_.info('flow pool predict end...')
     log_.info(f"expend time = {(time.time() - st_time) * 1000}ms")
+    print_check_log("--------------------后续日志--------------------")
 
 # python flowpool_data_update_with_level.py    测试环境必须手动执行python 才能有数据

+ 2 - 2
flowpool_data_update_with_level_score.py

@@ -2,8 +2,8 @@ import time
 import traceback
 import json
 
-from config import set_config
-from utils import request_post, filter_video_status, send_msg_to_feishu, filter_video_status_app, \
+from my_config import set_config
+from my_utils import request_post, filter_video_status, send_msg_to_feishu, filter_video_status_app, \
     filter_political_videos
 from log import Log
 from db_helper import RedisHelper

+ 1 - 1
flowpool_level_weight_update.py

@@ -1,6 +1,6 @@
 import json
 
-from config import set_config
+from my_config import set_config
 from log import Log
 from db_helper import RedisHelper
 

+ 2 - 2
get_data.py

@@ -3,8 +3,8 @@ import datetime
 import process_feature
 
 from datetime import datetime as dt
-from config import set_config
-from utils import get_data_from_odps, write_to_pickle
+from my_config import set_config
+from my_utils import get_data_from_odps, write_to_pickle
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
get_video_limit_list.py

@@ -1,8 +1,8 @@
 import datetime
 import traceback
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import request_post, RedisHelper
+from my_utils import request_post, RedisHelper
 
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
laohaokan_recommend_update.py

@@ -2,8 +2,8 @@ import datetime
 import traceback
 import gevent
 from db_helper import RedisHelper
-from utils import send_msg_to_feishu
-from config import set_config
+from my_utils import send_msg_to_feishu
+from my_config import set_config
 from log import Log
 
 config_, env = set_config()

+ 1 - 1
log_conf.py

@@ -3,7 +3,7 @@ import logging
 import aliyun
 import os
 import time
-from config import set_config
+from my_config import set_config
 config_, _ = set_config()
 
 # 本地日志存储路径

+ 0 - 0
config.py → my_config.py


+ 1 - 1
utils.py → my_utils.py

@@ -7,7 +7,7 @@ import traceback
 import pandas as pd
 
 from odps import ODPS
-from config import set_config
+from my_config import set_config
 from db_helper import HologresHelper, MysqlHelper, RedisHelper
 from log import Log
 from collections import defaultdict

+ 2 - 2
old_video_recall.py

@@ -5,9 +5,9 @@
 # @Software: PyCharm
 import pandas as pd
 from datetime import datetime
-from utils import get_data_from_odps, filter_video_status
+from my_utils import get_data_from_odps, filter_video_status
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, env = set_config()

+ 2 - 2
pool_predict.py

@@ -6,8 +6,8 @@ import os
 import traceback
 import random
 
-from config import set_config
-from utils import request_post, filter_video_status, send_msg_to_feishu, filter_video_status_app, \
+from my_config import set_config
+from my_utils import request_post, filter_video_status, send_msg_to_feishu, filter_video_status_app, \
     filter_political_videos
 from log import Log
 from db_helper import RedisHelper

+ 2 - 2
recommend_region_data_dup.py

@@ -11,9 +11,9 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer, Thread
-from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
+from my_utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
     check_table_partition_exits, filter_video_status_app, send_msg_to_feishu, filter_political_videos
-from config import set_config
+from my_config import set_config
 from log import Log
 from check_video_limit_distribute import update_limit_video_score
 

+ 1 - 1
recommend_region_data_status_update.py

@@ -1,5 +1,5 @@
 import datetime
-from config import set_config
+from my_config import set_config
 from log import Log
 from db_helper import RedisHelper
 

+ 2 - 2
redis_data_monitor.py

@@ -1,6 +1,6 @@
 import datetime
-from config import set_config
-from utils import RedisHelper, send_msg_to_feishu
+from my_config import set_config
+from my_utils import RedisHelper, send_msg_to_feishu
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
redis_test.py

@@ -1,5 +1,5 @@
-from utils import RedisHelper
-from config import set_config
+from my_utils import RedisHelper
+from my_config import set_config
 from log import Log
 import sys
 sys.path.append("zhangbo/")

+ 2 - 2
region_rule_rank_day.py

@@ -9,8 +9,8 @@ import pandas as pd
 import math
 from odps import ODPS
 from threading import Timer
-from utils import RedisHelper, get_data_from_odps, filter_video_status
-from config import set_config
+from my_utils import RedisHelper, get_data_from_odps, filter_video_status
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
region_rule_rank_h.py

@@ -17,9 +17,9 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer, Thread
-from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
+from my_utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
     check_table_partition_exits, filter_video_status_app, send_msg_to_feishu, filter_political_videos
-from config import set_config
+from my_config import set_config
 from log import Log
 from check_video_limit_distribute import update_limit_video_score
 

+ 2 - 2
region_rule_rank_h_by24h.py

@@ -15,9 +15,9 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer, Thread
-from utils import RedisHelper, get_data_from_odps, filter_video_status, check_table_partition_exits, \
+from my_utils import RedisHelper, get_data_from_odps, filter_video_status, check_table_partition_exits, \
     filter_video_status_app, send_msg_to_feishu
-from config import set_config
+from my_config import set_config
 from log import Log
 
 # os.environ['NUMEXPR_MAX_THREADS'] = '16'

+ 2 - 2
region_rule_rank_h_new.py

@@ -16,9 +16,9 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer, Thread
-from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
+from my_utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
     check_table_partition_exits, filter_video_status_app, send_msg_to_feishu, filter_political_videos
-from config import set_config
+from my_config import set_config
 from log import Log
 from check_video_limit_distribute_new import update_limit_video_score
 

+ 2 - 2
region_rule_rank_h_v2.py

@@ -17,9 +17,9 @@ import math
 from functools import reduce
 from odps import ODPS
 from threading import Timer, Thread
-from utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
+from my_utils import MysqlHelper, RedisHelper, get_data_from_odps, filter_video_status, filter_shield_video, \
     check_table_partition_exits, filter_video_status_app, send_msg_to_feishu, filter_political_videos
-from config import set_config
+from my_config import set_config
 from log import Log
 from check_video_limit_distribute import update_limit_video_score
 

+ 2 - 2
relevant_top_videos.py

@@ -1,9 +1,9 @@
 import pandas as pd
 import json
 import time
-from config import set_config
+from my_config import set_config
 from db_helper import RedisHelper
-from utils import filter_video_status
+from my_utils import filter_video_status
 
 config_, _ = set_config()
 

+ 2 - 2
religion_class_user_update.py

@@ -6,9 +6,9 @@ import pandas as pd
 from odps import ODPS
 from threading import Timer
 from get_data import get_data_from_odps
-from utils import send_msg_to_feishu
+from my_utils import send_msg_to_feishu
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, env = set_config()

+ 2 - 2
religion_class_videos_update.py

@@ -5,9 +5,9 @@ import pandas as pd
 from odps import ODPS
 from threading import Timer
 from get_data import get_data_from_odps
-from utils import send_msg_to_feishu
+from my_utils import send_msg_to_feishu
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, env = set_config()

+ 1 - 1
religion_videos_update.py

@@ -4,7 +4,7 @@ from odps import ODPS
 from threading import Timer
 from get_data import get_data_from_odps
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, env = set_config()

+ 2 - 2
rov_data_check.py

@@ -5,9 +5,9 @@ import traceback
 from odps import ODPS
 from datetime import datetime as dt
 from threading import Timer
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import send_msg_to_feishu
+from my_utils import send_msg_to_feishu
 
 config_, env = set_config()
 log_ = Log()

+ 2 - 2
rov_train.py

@@ -8,8 +8,8 @@ import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percentage_error
 
-from config import set_config
-from utils import read_from_pickle, write_to_pickle, data_normalization, \
+from my_config import set_config
+from my_utils import read_from_pickle, write_to_pickle, data_normalization, \
     request_post, filter_video_status, update_video_w_h_rate, filter_video_status_app, filter_shield_video
 from log import Log
 from db_helper import RedisHelper, MysqlHelper

+ 2 - 2
rule_rank_day.py

@@ -2,9 +2,9 @@ import pandas as pd
 from odps import ODPS
 from datetime import datetime, timedelta
 from threading import Timer
-from utils import get_data_from_odps, filter_video_status
+from my_utils import get_data_from_odps, filter_video_status
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
rule_rank_day_by_30day.py

@@ -6,8 +6,8 @@ from threading import Timer
 from datetime import datetime, timedelta
 from get_data import get_data_from_odps
 from db_helper import RedisHelper
-from utils import filter_video_status, check_table_partition_exits
-from config import set_config
+from my_utils import filter_video_status, check_table_partition_exits
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
rule_rank_h.py

@@ -4,9 +4,9 @@ import math
 from odps import ODPS
 from threading import Timer
 from get_data import get_data_from_odps
-from utils import filter_video_status
+from my_utils import filter_video_status
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
rule_rank_h_18_19.py

@@ -13,9 +13,9 @@ from odps import ODPS
 from threading import Timer
 from get_data import get_data_from_odps
 from db_helper import RedisHelper, MysqlHelper
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import filter_video_status
+from my_utils import filter_video_status
 
 config_, env = set_config()
 log_ = Log()

+ 2 - 2
rule_rank_h_by_24h.py

@@ -10,9 +10,9 @@ from threading import Timer
 from datetime import datetime, timedelta
 from get_data import get_data_from_odps
 from db_helper import RedisHelper
-from utils import filter_video_status, check_table_partition_exits, filter_video_status_app, \
+from my_utils import filter_video_status, check_table_partition_exits, filter_video_status_app, \
     request_post, send_msg_to_feishu
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
rule_rank_h_by_48h.py

@@ -12,8 +12,8 @@ from threading import Timer
 from datetime import datetime, timedelta
 from get_data import get_data_from_odps
 from db_helper import RedisHelper
-from utils import filter_video_status, check_table_partition_exits
-from config import set_config
+from my_utils import filter_video_status, check_table_partition_exits
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
rule_rank_h_new.py

@@ -7,8 +7,8 @@ from threading import Timer
 from datetime import datetime, timedelta
 from get_data import get_data_from_odps
 from db_helper import RedisHelper
-from utils import filter_video_status, check_table_partition_exits, filter_video_status_app, send_msg_to_feishu
-from config import set_config
+from my_utils import filter_video_status, check_table_partition_exits, filter_video_status_app, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
shield_videos.py

@@ -1,9 +1,9 @@
 import datetime
 import traceback
 import datetime
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import execute_sql_from_odps
+from my_utils import execute_sql_from_odps
 from db_helper import RedisHelper
 
 config_, _ = set_config()

+ 2 - 2
special_mid_videos_update.py

@@ -7,9 +7,9 @@ from odps import ODPS
 from threading import Timer
 from get_data import get_data_from_odps
 from db_helper import RedisHelper, MysqlHelper
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import request_post
+from my_utils import request_post
 
 config_, env = set_config()
 log_ = Log()

+ 2 - 2
special_mids_update.py

@@ -7,9 +7,9 @@ from odps import ODPS
 from threading import Timer
 from get_data import get_data_from_odps
 from db_helper import RedisHelper, MysqlHelper
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import filter_video_status_with_applet_rec
+from my_utils import filter_video_status_with_applet_rec
 
 config_, env = set_config()
 log_ = Log()

+ 2 - 2
top_video_list.py

@@ -1,7 +1,7 @@
 import traceback
-from utils import request_post, filter_video_status, filter_video_status_app
+from my_utils import request_post, filter_video_status, filter_video_status_app
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 1 - 1
update_in_flowpool_count_switch.py

@@ -1,4 +1,4 @@
-from config import set_config
+from my_config import set_config
 from log import Log
 from db_helper import RedisHelper
 

+ 2 - 2
user_group_update.py

@@ -5,8 +5,8 @@ import time
 import traceback
 import gevent
 from threading import Timer
-from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
-from config import set_config
+from my_utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
+from my_config import set_config
 from log import Log
 config_, _ = set_config()
 log_ = Log()

+ 2 - 2
videos_filter.py

@@ -9,9 +9,9 @@ import pandas as pd
 from datetime import date, timedelta, datetime
 
 from region_rule_rank_h import region_code
-from utils import filter_video_status, send_msg_to_feishu, filter_video_status_app
+from my_utils import filter_video_status, send_msg_to_feishu, filter_video_status_app
 from db_helper import RedisHelper, MysqlHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, env = set_config()

+ 2 - 2
videos_similarity.py

@@ -4,9 +4,9 @@ import datetime
 
 import pandas as pd
 from odps import ODPS
-from utils import filter_video_status
+from my_utils import filter_video_status
 from db_helper import RedisHelper
-from config import set_config
+from my_config import set_config
 from log import Log
 
 config_, _ = set_config()

+ 2 - 2
whole_movies_update.py

@@ -7,9 +7,9 @@ from odps import ODPS
 from threading import Timer
 from get_data import get_data_from_odps
 from db_helper import RedisHelper, MysqlHelper
-from config import set_config
+from my_config import set_config
 from log import Log
-from utils import filter_video_status_with_applet_rec
+from my_utils import filter_video_status_with_applet_rec
 
 config_, env = set_config()
 log_ = Log()

+ 0 - 0
write_redis/__if__.py


+ 164 - 0
write_redis/alg_recsys_feature_01_vid2titletags_redis.py

@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+import os
+import sys
+root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+if root_dir not in sys.path:
+    sys.path.append(root_dir)
+    print("******** sys.path ********")
+    print(sys.path)
+
+from odps import ODPS
+from threading import Timer
+import threading
+from my_utils import RedisHelper, execute_sql_from_odps
+from my_config import set_config
+from log import Log
+import json
+from datetime import datetime
+from queue import Queue
+from tqdm import tqdm
+
+
+
+
+config_, _ = set_config()
+log_ = Log()
+redis_helper = RedisHelper()
+
+REDIS_PREFIX = "redis:vid_title_tags:"
+EXPIRE_TIME = 3 * 3600
+
+def worker(queue, executor):
+    while True:
+        row = queue.get()
+        if row is None:  # 结束信号
+            queue.task_done()
+            break
+        executor(row)
+        queue.task_done()
+def records_process_for_list(records, executor, max_size=50, num_workers=10):
+    # 创建一个线程安全的队列
+    queue = Queue(maxsize=max_size)  # 可以调整 maxsize 以控制内存使用
+    # 设置线程池大小
+    num_workers = num_workers
+    # 启动工作线程
+    threads = []
+    for _ in range(num_workers):
+        t = threading.Thread(target=worker, args=(queue, executor))
+        t.start()
+        threads.append(t)
+    # 读取数据并放入队列
+    for row in tqdm(records):
+        queue.put(row)
+    # 发送结束信号
+    for _ in range(num_workers):
+        queue.put(None)
+    # 等待所有任务完成
+    queue.join()
+    # 等待所有工作线程结束
+    for t in threads:
+        t.join()
+def process_and_store(row):
+    video_id, json_str = row
+    key = REDIS_PREFIX + str(video_id)
+    expire_time = EXPIRE_TIME
+    redis_helper.set_data_to_redis(key, json_str, expire_time)
+
+def check_data(project, table,  date, hour) -> int:
+    """检查数据是否准备好,输出数据条数"""
+    odps = ODPS(
+        access_id=config_.ODPS_CONFIG['ACCESSID'],
+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
+        project=project,
+        endpoint=config_.ODPS_CONFIG['ENDPOINT'],
+        connect_timeout=3000,
+        read_timeout=500000,
+        pool_maxsize=1000,
+        pool_connections=1000
+    )
+    try:
+        t = odps.get_table(name=table)
+        log_.info(f"检查分区是否存在-【 dt={date} hh={hour}】")
+        check_res = t.exist_partition(partition_spec=f'dt={date},hh={hour}')
+        if check_res:
+            sql = f'select * from {project}.{table} where dt = {date} and hh = {hour}'
+            log_.info(sql)
+            with odps.execute_sql(sql=sql).open_reader() as reader:
+                data_count = reader.count
+        else:
+            log_.info("表{}分区{}/{}不存在".format(table, date, hour))
+            data_count = 0
+    except Exception as e:
+        log_.error("table:{},date:{},hour:{} no data. return data_count=0,报错原因是:{}".format(table, date, hour, e))
+        data_count = 0
+    return data_count
+
+def get_sql(project, table, date, hour):
+    sql = '''
+    SELECT  vid
+            ,tags
+            ,title
+    FROM    {}.{}
+    WHERE   dt = '{}'
+    and     hh = '{}'
+    '''.format(
+        project, table, date, hour
+    )
+    print("sql:" + sql)
+    records = execute_sql_from_odps(project=project, sql=sql)
+    video_list = []
+    with records.open_reader() as reader:
+        for record in reader:
+            video_id = record['vid']
+            m = dict()
+            try:
+                m["tags"] = record['tags']
+            except Exception as e:
+                log_.error(e)
+            try:
+                m["title"] = record['title']
+            except Exception as e:
+                log_.error(e)
+
+            json_str = json.dumps(m)
+            video_list.append([video_id, json_str])
+    return video_list
+
+
+def main():
+    try:
+        date = sys.argv[1]
+        hour = sys.argv[2]
+    except Exception as e:
+        date = datetime.now().strftime('%Y%m%d')
+        hour = datetime.now().hour
+        log_.info("没有读取到参数,采用系统时间:{}".format(e))
+    log_.info("使用时间参数-日期:{},小时:{}".format(date, str(hour)))
+    if hour in ["00"]:
+        log_.info(f"hour={hour}不执行,直接返回。")
+        return
+    # 1 判断上游数据表是否生产完成
+    project = "loghubods"
+    table = "alg_recsys_feature_title_tags"
+    table_data_cnt = check_data(project, table, date, hour)
+    if table_data_cnt == 0:
+        log_.info("上游数据{}未就绪{}/{},等待...".format(table, date, hour))
+        Timer(60, main).start()
+    else:
+        log_.info("上游数据就绪,count={},开始读取数据表".format(table_data_cnt))
+        # 2 读取数据表 处理特征
+        video_list = get_sql(project, table, date, hour)
+        # 3 写入redis
+        log_.info("video的数据量:{}".format(len(video_list)))
+        records_process_for_list(video_list, process_and_store, max_size=100, num_workers=8)
+
+if __name__ == '__main__':
+    log_.info("开始执行:" + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
+    main()
+    log_.info("完成执行:" + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
+
+
+
+# previous_date_str = (datetime.strptime(date, "%Y%m%d") - timedelta(days=1)).strftime("%Y%m%d")
+# cd /root/zhangbo/rov-offline
+# python alg_recsys_feature_01_vid2titletags_redis.py 20240508 14

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.