|
@@ -7,7 +7,7 @@ features = [
|
|
|
'mid',
|
|
|
'videoid',
|
|
|
'ad_mid',
|
|
|
- 'share_videoid'
|
|
|
+ 'share_videoid',
|
|
|
'mid_preview_count_30day',
|
|
|
'mid_view_count_30day',
|
|
|
'mid_view_count_pv_30day',
|
|
@@ -60,7 +60,7 @@ train_feature = [
|
|
|
'video_share_rate_pv_30day',
|
|
|
'video_return_rate_30day',
|
|
|
'ad_status',
|
|
|
- 'share_status'
|
|
|
+ 'share_status',
|
|
|
]
|
|
|
|
|
|
|
|
@@ -79,7 +79,10 @@ def get_feature_data(project, table, features, dt):
|
|
|
|
|
|
def daily_data_process(project, table, features, dt, app_type):
|
|
|
"""每日特征处理"""
|
|
|
+ print('step 1: get feature data')
|
|
|
feature_initial_df = get_feature_data(project=project, table=table, features=features, dt=dt)
|
|
|
+ print(f"feature_initial_df shape: {feature_initial_df.shape}")
|
|
|
+ print('step 2: process')
|
|
|
feature_initial_df['apptype'] = feature_initial_df['apptype'].astype(int)
|
|
|
feature_df = feature_initial_df[feature_initial_df['apptype'] == app_type]
|
|
|
# 增加此次是否有广告字段 'ad_status' 1: 有广告, 0: 无广告
|
|
@@ -124,11 +127,23 @@ def daily_data_process(project, table, features, dt, app_type):
|
|
|
]
|
|
|
for column_name in type_float_columns:
|
|
|
feature_df[column_name].astype(float)
|
|
|
+ print(f"feature_df shape: {feature_df.shape}")
|
|
|
# 获取所需的字段
|
|
|
+ print('step 3: get train_df')
|
|
|
train_df = feature_df[train_feature]
|
|
|
+ print(f"train_df shape: {train_df.shape}")
|
|
|
return train_df
|
|
|
|
|
|
|
|
|
+if __name__ == '__main__':
|
|
|
+ project = 'loghubods'
|
|
|
+ table = 'admodel_data_train'
|
|
|
+ dt = '20230725'
|
|
|
+ df = daily_data_process(project=project, table=table, features=features, dt=dt, app_type=0)
|
|
|
+ print(df.shape)
|
|
|
+ print(df.columns)
|
|
|
+ df.to_csv(f'./data/{dt}.csv')
|
|
|
+
|
|
|
|
|
|
|
|
|
|