浏览代码

Merge branch 'master' into test

caida 1 年之前
父节点
当前提交
a604d9d2c0

+ 55 - 13
ad_out_v1_get_offline_score_item.py

@@ -1,8 +1,6 @@
 #coding utf-8
-from tqdm import tqdm
 import sys
-import json
-
+import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
@@ -17,10 +15,8 @@ from feature import get_item_features
 from lr_model import LrModel
 from utils import exe_sql
 
-
-if __name__ == "__main__":
+def update_offline_score_item(dt):
     project = 'loghubods'
-    datetime = sys.argv[1]
     sql = """
 --odps sql 
 --********************************************************************--
@@ -127,7 +123,7 @@ mid AS u_id
 ,round(if(i_ros_3month > 10.0, 10.0, i_ros_3month) / 10.0, 6) as i_ros_3month
 from
 loghubods.user_video_features_data_final
-where dt='{datetime}'
+where dt='{dt}'
 and ad_ornot = '0'
 and apptype != '13'
 ), candidate_user as (
@@ -223,8 +219,8 @@ and apptype != '13'
 SELECT
 *
 from candidate_item
-    """.format(datetime=datetime)
-    # print(sql)
+    """.format(dt=dt)
+    # log_.info(sql)
     data = exe_sql(project, sql)
     print('sql done')
     # data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
@@ -232,8 +228,6 @@ from candidate_item
     model_key = 'ad_out_v1'
     lr_model = LrModel('model/{}.json'.format(model_key))
     item_h_dict = {}
-    k_col = 'i_id'
-    dt = datetime
     key_name_prefix = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}"
     print(key_name_prefix)
     mean_item_h = 0.0
@@ -251,8 +245,8 @@ from candidate_item
             count_item_h += 1
             # print(item_features)
             # print(item_h)
-    mean_item_h = mean_item_h / count_item_h 
-    item_h_dict['mean'] = mean_item_h 
+    mean_item_h = mean_item_h / count_item_h
+    item_h_dict['mean'] = mean_item_h
     print(mean_item_h)
     print(count_item_h)
     k = 'mean'
@@ -260,3 +254,51 @@ from candidate_item
     # with open('{}.json'.format(key_name_prefix), 'w') as fout:
     #     json.dump(item_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)
 
+
+def timer_check(dt):
+    try:
+        project = config_.ad_model_data['ad_out_v1'].get('project')
+        table = config_.ad_model_data['ad_out_v1'].get('table')
+        now_date = datetime.datetime.today()
+        yesterday_date = now_date - datetime.timedelta(days=1)
+        now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+        yesterday_dt = datetime.datetime.strftime(yesterday_date, '%Y%m%d')
+        log_.info(f"now_dt: {now_dt}")
+        if dt is not None:
+            yesterday_dt = dt
+        log_.info(f"update_dt: {yesterday_dt}")
+        now_min = datetime.datetime.now().minute
+        # 查看当前更新的数据是否已准备好
+        data_count = data_check(project=project, table=table, dt=yesterday_dt)
+        if data_count > 0:
+            log_.info('update_offline_score_item start! {}'.format(data_count))
+            # 数据准备好,进行更新
+            update_offline_score_item(dt=yesterday_dt)
+            log_.info('update_offline_score_item end!')
+        else:
+            # 数据没准备好,5分钟后重新检查
+            wait_seconds = 5 * 60
+            log_.info('data not ready, wait {}s'.format(wait_seconds))
+            Timer(wait_seconds, timer_check).start()
+
+    except Exception as e:
+        log_.error(f"用户广告跳出率预估离线item数据更新失败 exception: {e}, traceback: {traceback.format_exc()}")
+        send_msg_to_feishu(
+            webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
+            key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
+            msg_text=f"rov-offline{config_.ENV_TEXT} - 用户广告跳出率预估离线item数据更新失败\n"
+                     f"exception: {e}\n"
+                     f"traceback: {traceback.format_exc()}"
+        )
+
+
+if __name__ == "__main__":
+    dt = None
+    if len(sys.argv) > 1:
+        dt = sys.argv[1]
+        log_.info('## 手动更新:{}'.format(dt))
+    else:
+        log_.info('## 自动更新')
+    timer_check(dt)
+
+

+ 9 - 2
ad_out_v1_get_offline_score_item.sh

@@ -1,3 +1,10 @@
-
-/root/anaconda3/bin/python ad_out_v1_get_offline_score_item.py $*
+source /etc/profile
+echo $ROV_OFFLINE_ENV
+if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
+    cd /data2/rov-offline &&
+    /root/anaconda3/bin/python /data2/rov-offline/ad_out_v1_get_offline_score_item.py $*
+elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
+    cd /data/rov-offline &&
+    /root/anaconda3/bin/python /data/rov-offline/ad_out_v1_get_offline_score_item.py $*
+fi
 

+ 53 - 8
ad_out_v1_get_offline_score_user.py

@@ -1,8 +1,6 @@
 #coding utf-8
-from tqdm import tqdm
 import sys
-import json
-
+import datetime
 import traceback
 from threading import Timer
 from tqdm import tqdm
@@ -17,9 +15,8 @@ from feature import get_user_features
 from lr_model import LrModel
 from utils import exe_sql
 
-if __name__ == "__main__":
+def update_offline_score_user(dt):
     project = 'loghubods'
-    datetime = sys.argv[1]
     sql = """
 --odps sql 
 --********************************************************************--
@@ -126,7 +123,7 @@ mid AS u_id
 ,round(if(i_ros_3month > 10.0, 10.0, i_ros_3month) / 10.0, 6) as i_ros_3month
 from
 loghubods.user_video_features_data_final
-where dt='{datetime}'
+where dt='{dt}'
 and ad_ornot = '0'
 and apptype != '13'
 ), candidate_user as (
@@ -222,8 +219,8 @@ and apptype != '13'
 SELECT
 *
 from candidate_user
-    """.format(datetime=datetime)
-    # print(sql)
+    """.format(dt=dt)
+    # log_.info(sql)
     data = exe_sql(project, sql)
     print('sql done')
     # data.to_csv('./data/ad_out_sample_v2_item.{datetime}'.format(datetime=datetime), sep='\t')
@@ -259,3 +256,51 @@ from candidate_user
     # with open('{}.json'.format(key_name_prefix), 'w') as fout:
     #     json.dump(user_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)
 
+
+def timer_check(dt):
+    try:
+        project = config_.ad_model_data['ad_out_v1'].get('project')
+        table = config_.ad_model_data['ad_out_v1'].get('table')
+        now_date = datetime.datetime.today()
+        yesterday_date = now_date - datetime.timedelta(days=1)
+        now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+        yesterday_dt = datetime.datetime.strftime(yesterday_date, '%Y%m%d')
+        log_.info(f"now_dt: {now_dt}")
+        if dt is not None:
+            yesterday_dt = dt
+        log_.info(f"update_dt: {yesterday_dt}")
+        now_min = datetime.datetime.now().minute
+        # 查看当前更新的数据是否已准备好
+        data_count = data_check(project=project, table=table, dt=yesterday_dt)
+        if data_count > 0:
+            log_.info('update_offline_score_user start! {}'.format(data_count))
+            # 数据准备好,进行更新
+            update_offline_score_user(dt=yesterday_dt)
+            log_.info('update_offline_score_user end!')
+        else:
+            # 数据没准备好,5分钟后重新检查
+            wait_seconds = 5 * 60
+            log_.info('data not ready, wait {}s'.format(wait_seconds))
+            Timer(wait_seconds, timer_check).start()
+
+    except Exception as e:
+        log_.error(f"用户广告跳出率预估离线user数据更新失败 exception: {e}, traceback: {traceback.format_exc()}")
+        send_msg_to_feishu(
+            webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
+            key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
+            msg_text=f"rov-offline{config_.ENV_TEXT} - 用户广告跳出率预估离线user数据更新失败\n"
+                     f"exception: {e}\n"
+                     f"traceback: {traceback.format_exc()}"
+        )
+
+
+if __name__ == "__main__":
+    dt = None
+    if len(sys.argv) > 1:
+        dt = sys.argv[1]
+        log_.info('## 手动更新:{}'.format(dt))
+    else:
+        log_.info('## 自动更新')
+    timer_check(dt)
+
+

+ 9 - 2
ad_out_v1_get_offline_score_user.sh

@@ -1,3 +1,10 @@
-
-/root/anaconda3/bin/python ad_out_v1_get_offline_score_user.py $*
+source /etc/profile
+echo $ROV_OFFLINE_ENV
+if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
+    cd /data2/rov-offline &&
+    /root/anaconda3/bin/python /data2/rov-offline/ad_out_v1_get_offline_score_user.py $*
+elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
+    cd /data/rov-offline &&
+    /root/anaconda3/bin/python /data/rov-offline/ad_out_v1_get_offline_score_user.py $*
+fi
 

+ 4 - 0
config.py

@@ -841,6 +841,10 @@ class BaseConfig(object):
 
     # 广告模型数据
     ad_model_data = {
+        'ad_out_v1': {
+            'project': 'loghubods',
+            'table': 'user_video_features_data_final'
+        },
         'user_group': {
             'project': 'loghubods',
             'table': 'user_share_return_admodel'