|
@@ -75,7 +75,7 @@ def user_data_process(project, table, dt, app_type):
|
|
|
'video_return_count_30day',
|
|
|
]
|
|
|
for column_name in type_int_columns:
|
|
|
- feature_df[column_name].astype(int)
|
|
|
+ feature_df[column_name] = feature_df[column_name].astype(int)
|
|
|
type_float_columns = [
|
|
|
'video_ctr_uv_30day',
|
|
|
'video_ctr_pv_30day',
|
|
@@ -84,7 +84,7 @@ def user_data_process(project, table, dt, app_type):
|
|
|
'video_return_rate_30day',
|
|
|
]
|
|
|
for column_name in type_float_columns:
|
|
|
- feature_df[column_name].astype(float)
|
|
|
+ feature_df[column_name] = feature_df[column_name].astype(float)
|
|
|
print(f"feature_df shape: {feature_df.shape}")
|
|
|
print('step 3: add new video feature')
|
|
|
# 补充新用户默认数据(使用均值)
|
|
@@ -102,15 +102,15 @@ def user_data_process(project, table, dt, app_type):
|
|
|
'video_return_count_30day': int(feature_df['video_return_count_30day'].mean()),
|
|
|
}
|
|
|
new_video_feature['video_ctr_uv_30day'] = float(
|
|
|
- new_video_feature['video_play_count_uv_30day'] / new_video_feature['video_view_count_uv_30day'])
|
|
|
+ new_video_feature['video_play_count_uv_30day'] / new_video_feature['video_view_count_uv_30day'] + 1)
|
|
|
new_video_feature['video_ctr_pv_30day'] = float(
|
|
|
- new_video_feature['video_play_count_pv_30day'] / new_video_feature['video_view_count_pv_30day'])
|
|
|
+ new_video_feature['video_play_count_pv_30day'] / new_video_feature['video_view_count_pv_30day'] + 1)
|
|
|
new_video_feature['video_share_rate_uv_30day'] = float(
|
|
|
- new_video_feature['video_share_count_uv_30day'] / new_video_feature['video_play_count_uv_30day'])
|
|
|
+ new_video_feature['video_share_count_uv_30day'] / new_video_feature['video_play_count_uv_30day'] + 1)
|
|
|
new_video_feature['video_share_rate_pv_30day'] = float(
|
|
|
- new_video_feature['video_share_count_pv_30day'] / new_video_feature['video_play_count_pv_30day'])
|
|
|
+ new_video_feature['video_share_count_pv_30day'] / new_video_feature['video_play_count_pv_30day'] + 1)
|
|
|
new_video_feature['video_return_rate_30day'] = float(
|
|
|
- new_video_feature['video_return_count_30day'] / new_video_feature['video_view_count_pv_30day'])
|
|
|
+ new_video_feature['video_return_count_30day'] / new_video_feature['video_view_count_pv_30day'] + 1)
|
|
|
new_video_feature_df = pd.DataFrame([new_video_feature])
|
|
|
video_df = pd.concat([feature_df, new_video_feature_df])
|
|
|
print(f"video_df shape: {video_df.shape}")
|