liqian 1 year ago
parent
commit
a9f6f2ffaf
2 changed files with 62 additions and 32 deletions
  1. 27 14
      ad_predict_user_data_process.py
  2. 35 18
      ad_predict_video_data_process.py

+ 27 - 14
ad_predict_user_data_process.py

@@ -41,7 +41,7 @@ def get_feature_data(project, table, dt, app_type):
         endpoint=odps_config['ENDPOINT'],
     )
     feature_data = []
-    sql = f"select * from {project}.{table} where dt={dt} and apptype={app_type} limit 10000"
+    sql = f"select * from {project}.{table} where dt={dt} and apptype={app_type}"
     with odps.execute_sql(sql).open_reader() as reader:
         for record in reader:
             # print(record)
@@ -115,18 +115,31 @@ def user_data_process(project, table, dt, app_type):
     for ind, row in user_df.iterrows():
         app_type = row['apptype']
         mid = row['mid']
-        value = {
-            'mid_preview_count_30day': row['mid_preview_count_30day'],
-            'mid_view_count_30day': row['mid_view_count_30day'],
-            'mid_view_count_pv_30day': row['mid_view_count_pv_30day'],
-            'mid_play_count_30day': row['mid_play_count_30day'],
-            'mid_play_count_pv_30day': row['mid_play_count_pv_30day'],
-            'mid_share_count_30day': row['mid_share_count_30day'],
-            'mid_share_count_pv_30day': row['mid_share_count_pv_30day'],
-            'mid_return_count_30day': row['mid_return_count_30day'],
-            'mid_share_rate_30day': row['mid_share_rate_30day'],
-            'mid_return_rate_30day': row['mid_return_rate_30day']
-        }
+        # value = {
+        #     'mid_preview_count_30day': row['mid_preview_count_30day'],
+        #     'mid_view_count_30day': row['mid_view_count_30day'],
+        #     'mid_view_count_pv_30day': row['mid_view_count_pv_30day'],
+        #     'mid_play_count_30day': row['mid_play_count_30day'],
+        #     'mid_play_count_pv_30day': row['mid_play_count_pv_30day'],
+        #     'mid_share_count_30day': row['mid_share_count_30day'],
+        #     'mid_share_count_pv_30day': row['mid_share_count_pv_30day'],
+        #     'mid_return_count_30day': row['mid_return_count_30day'],
+        #     'mid_share_rate_30day': row['mid_share_rate_30day'],
+        #     'mid_return_rate_30day': row['mid_return_rate_30day']
+        # }
+        value = [
+            row['mid_preview_count_30day'],
+            row['mid_view_count_30day'],
+            row['mid_view_count_pv_30day'],
+            row['mid_play_count_30day'],
+            row['mid_play_count_pv_30day'],
+            row['mid_share_count_30day'],
+            row['mid_share_count_pv_30day'],
+            row['mid_return_count_30day'],
+            row['mid_share_rate_30day'],
+            row['mid_return_rate_30day']
+        ]
+
         key = f"{xgb_config['predict_user_feature_key_prefix']}{app_type}:{mid}"
         redis_helper.set_data_to_redis(key_name=key, value=str(value), expire_time=48 * 3600)
 
@@ -152,4 +165,4 @@ def timer_check():
 if __name__ == '__main__':
     st_time = time.time()
     timer_check()
-    print(f"{time.time() - st_time}s")
+    print(f"execute time: {time.time() - st_time}s")

+ 35 - 18
ad_predict_video_data_process.py

@@ -45,7 +45,7 @@ def get_feature_data(project, table, dt, app_type):
         endpoint=odps_config['ENDPOINT'],
     )
     feature_data = []
-    sql = f"select * from {project}.{table} where dt={dt} and apptype={app_type} limit 1000"
+    sql = f"select * from {project}.{table} where dt={dt} and apptype={app_type}"
     with odps.execute_sql(sql).open_reader() as reader:
         for record in reader:
             # print(record)
@@ -130,22 +130,39 @@ def video_data_process(project, table, dt, app_type):
     for ind, row in video_df.iterrows():
         app_type = row['apptype']
         video_id = row['videoid']
-        value = {
-            'video_preview_count_uv_30day': row['video_preview_count_uv_30day'],
-            'video_preview_count_pv_30day': row['video_preview_count_pv_30day'],
-            'video_view_count_uv_30day': row['video_view_count_uv_30day'],
-            'video_view_count_pv_30day': row['video_view_count_pv_30day'],
-            'video_play_count_uv_30day': row['video_play_count_uv_30day'],
-            'video_play_count_pv_30day': row['video_play_count_pv_30day'],
-            'video_share_count_uv_30day': row['video_share_count_uv_30day'],
-            'video_share_count_pv_30day': row['video_share_count_pv_30day'],
-            'video_return_count_30day': row['video_return_count_30day'],
-            'video_ctr_uv_30day': row['video_ctr_uv_30day'],
-            'video_ctr_pv_30day': row['video_ctr_pv_30day'],
-            'video_share_rate_uv_30day': row['video_share_rate_uv_30day'],
-            'video_share_rate_pv_30day': row['video_share_rate_pv_30day'],
-            'video_return_rate_30day': row['video_return_rate_30day']
-        }
+        # value = {
+        #     'video_preview_count_uv_30day': row['video_preview_count_uv_30day'],
+        #     'video_preview_count_pv_30day': row['video_preview_count_pv_30day'],
+        #     'video_view_count_uv_30day': row['video_view_count_uv_30day'],
+        #     'video_view_count_pv_30day': row['video_view_count_pv_30day'],
+        #     'video_play_count_uv_30day': row['video_play_count_uv_30day'],
+        #     'video_play_count_pv_30day': row['video_play_count_pv_30day'],
+        #     'video_share_count_uv_30day': row['video_share_count_uv_30day'],
+        #     'video_share_count_pv_30day': row['video_share_count_pv_30day'],
+        #     'video_return_count_30day': row['video_return_count_30day'],
+        #     'video_ctr_uv_30day': row['video_ctr_uv_30day'],
+        #     'video_ctr_pv_30day': row['video_ctr_pv_30day'],
+        #     'video_share_rate_uv_30day': row['video_share_rate_uv_30day'],
+        #     'video_share_rate_pv_30day': row['video_share_rate_pv_30day'],
+        #     'video_return_rate_30day': row['video_return_rate_30day']
+        # }
+        value = [
+            row['video_preview_count_uv_30day'],
+            row['video_preview_count_pv_30day'],
+            row['video_view_count_uv_30day'],
+            row['video_view_count_pv_30day'],
+            row['video_play_count_uv_30day'],
+            row['video_play_count_pv_30day'],
+            row['video_share_count_uv_30day'],
+            row['video_share_count_pv_30day'],
+            row['video_return_count_30day'],
+            row['video_ctr_uv_30day'],
+            row['video_ctr_pv_30day'],
+            row['video_share_rate_uv_30day'],
+            row['video_share_rate_pv_30day'],
+            row['video_return_rate_30day']
+        ]
+
         key = f"{xgb_config['predict_video_feature_key_prefix']}{app_type}:{video_id}"
         redis_helper.set_data_to_redis(key_name=key, value=str(value), expire_time=48*3600)
 
@@ -171,5 +188,5 @@ def timer_check():
 if __name__ == '__main__':
     st_time = time.time()
     timer_check()
-    print(f"{time.time() - st_time}s")
+    print(f"execute time: {time.time() - st_time}s")