liqian 1 年之前
父节点
当前提交
5aafabe02b
共有 3 个文件被更改,包括 13 次插入13 次删除
  1. 2 2
      ad_feature_process.py
  2. 4 4
      ad_predict_user_data_process.py
  3. 7 7
      ad_predict_video_data_process.py

+ 2 - 2
ad_feature_process.py

@@ -134,7 +134,7 @@ def daily_data_process(project, table, features, dt, app_type):
         'video_return_count_30day',
     ]
     for column_name in type_int_columns:
-        feature_df[column_name].astype(int)
+        feature_df[column_name] = feature_df[column_name].astype(int)
     type_float_columns = [
         'mid_share_rate_30day',
         'mid_return_rate_30day',
@@ -145,7 +145,7 @@ def daily_data_process(project, table, features, dt, app_type):
         'video_return_rate_30day',
     ]
     for column_name in type_float_columns:
-        feature_df[column_name].astype(float)
+        feature_df[column_name] = feature_df[column_name].astype(float)
     print(f"feature_df shape: {feature_df.shape}")
     # 获取所需的字段
     print('step 3: get train_df')

+ 4 - 4
ad_predict_user_data_process.py

@@ -70,13 +70,13 @@ def user_data_process(project, table, dt, app_type):
         'mid_return_count_30day',
     ]
     for column_name in type_int_columns:
-        feature_df[column_name].astype(int)
+        feature_df[column_name] = feature_df[column_name].astype(int)
     type_float_columns = [
         'mid_share_rate_30day',
         'mid_return_rate_30day',
     ]
     for column_name in type_float_columns:
-        feature_df[column_name].astype(float)
+        feature_df[column_name] = feature_df[column_name].astype(float)
     print(f"feature_df shape: {feature_df.shape}")
     print('step 3: add new user feature')
     # 补充新用户默认数据(使用均值)
@@ -93,9 +93,9 @@ def user_data_process(project, table, dt, app_type):
         'mid_return_count_30day': int(feature_df['mid_return_count_30day'].mean()),
     }
     new_user_feature['mid_share_rate_30day'] = float(
-        new_user_feature['mid_share_count_pv_30day'] / new_user_feature['mid_play_count_pv_30day'])
+        new_user_feature['mid_share_count_pv_30day'] / new_user_feature['mid_play_count_pv_30day'] + 1)
     new_user_feature['mid_return_rate_30day'] = float(
-        new_user_feature['mid_return_count_30day'] / new_user_feature['mid_view_count_pv_30day'])
+        new_user_feature['mid_return_count_30day'] / new_user_feature['mid_view_count_pv_30day'] + 1)
     new_user_feature_df = pd.DataFrame([new_user_feature])
     user_df = pd.concat([feature_df, new_user_feature_df])
     print(f"user_df shape: {user_df.shape}")

+ 7 - 7
ad_predict_video_data_process.py

@@ -75,7 +75,7 @@ def user_data_process(project, table, dt, app_type):
         'video_return_count_30day',
     ]
     for column_name in type_int_columns:
-        feature_df[column_name].astype(int)
+        feature_df[column_name] = feature_df[column_name].astype(int)
     type_float_columns = [
         'video_ctr_uv_30day',
         'video_ctr_pv_30day',
@@ -84,7 +84,7 @@ def user_data_process(project, table, dt, app_type):
         'video_return_rate_30day',
     ]
     for column_name in type_float_columns:
-        feature_df[column_name].astype(float)
+        feature_df[column_name] = feature_df[column_name].astype(float)
     print(f"feature_df shape: {feature_df.shape}")
     print('step 3: add new video feature')
     # 补充新用户默认数据(使用均值)
@@ -102,15 +102,15 @@ def user_data_process(project, table, dt, app_type):
         'video_return_count_30day': int(feature_df['video_return_count_30day'].mean()),
     }
     new_video_feature['video_ctr_uv_30day'] = float(
-        new_video_feature['video_play_count_uv_30day'] / new_video_feature['video_view_count_uv_30day'])
+        new_video_feature['video_play_count_uv_30day'] / new_video_feature['video_view_count_uv_30day'] + 1)
     new_video_feature['video_ctr_pv_30day'] = float(
-        new_video_feature['video_play_count_pv_30day'] / new_video_feature['video_view_count_pv_30day'])
+        new_video_feature['video_play_count_pv_30day'] / new_video_feature['video_view_count_pv_30day'] + 1)
     new_video_feature['video_share_rate_uv_30day'] = float(
-        new_video_feature['video_share_count_uv_30day'] / new_video_feature['video_play_count_uv_30day'])
+        new_video_feature['video_share_count_uv_30day'] / new_video_feature['video_play_count_uv_30day'] + 1)
     new_video_feature['video_share_rate_pv_30day'] = float(
-        new_video_feature['video_share_count_pv_30day'] / new_video_feature['video_play_count_pv_30day'])
+        new_video_feature['video_share_count_pv_30day'] / new_video_feature['video_play_count_pv_30day'] + 1)
     new_video_feature['video_return_rate_30day'] = float(
-        new_video_feature['video_return_count_30day'] / new_video_feature['video_view_count_pv_30day'])
+        new_video_feature['video_return_count_30day'] / new_video_feature['video_view_count_pv_30day'] + 1)
     new_video_feature_df = pd.DataFrame([new_video_feature])
     video_df = pd.concat([feature_df, new_video_feature_df])
     print(f"video_df shape: {video_df.shape}")