|
@@ -0,0 +1,53 @@
|
|
|
+import pandas as pd
|
|
|
+from utils import get_data_from_odps
|
|
|
+
|
|
|
+features = [
|
|
|
+ 'apptype',
|
|
|
+ 'videoid',
|
|
|
+ 'mid',
|
|
|
+ 'ad_mid',
|
|
|
+ 'mid_preview_count',
|
|
|
+ 'mid_view_count',
|
|
|
+ 'mid_view_count_pv',
|
|
|
+ 'mid_play_count',
|
|
|
+ 'mid_play_count_pv',
|
|
|
+ 'mid_share_count',
|
|
|
+ 'mid_share_count_pv',
|
|
|
+ 'mid_return_count',
|
|
|
+ 'mid_share_rate',
|
|
|
+ 'mid_return_rate',
|
|
|
+ 'video_preview_count_uv',
|
|
|
+ 'video_preview_count_pv',
|
|
|
+ 'video_view_count_uv',
|
|
|
+ 'video_view_count_pv',
|
|
|
+ 'video_play_count_uv',
|
|
|
+ 'video_play_count_pv',
|
|
|
+ 'video_share_count_uv',
|
|
|
+ 'video_share_count_pv',
|
|
|
+ 'video_return_count',
|
|
|
+ 'video_ctr_uv',
|
|
|
+ 'video_ctr_pv',
|
|
|
+ 'video_share_rate_uv',
|
|
|
+ 'video_share_rate_pv',
|
|
|
+ 'video_return_rate'
|
|
|
+ 'share_videoid'
|
|
|
+]
|
|
|
+
|
|
|
+
|
|
|
+def get_feature_data(project, table, features, dt):
|
|
|
+ """获取特征数据"""
|
|
|
+ records = get_data_from_odps(date=dt, project=project, table=table)
|
|
|
+ feature_data = []
|
|
|
+ for record in records:
|
|
|
+ item = {}
|
|
|
+ for feature_name in features:
|
|
|
+ item[feature_name] = record[feature_name]
|
|
|
+ feature_data.append(item)
|
|
|
+ feature_df = pd.DataFrame(feature_data)
|
|
|
+ return feature_df
|
|
|
+
|
|
|
+
|
|
|
+def daily_data_process(project, table, features, dt):
|
|
|
+ feature_df = get_feature_data(project=project, table=table, features=features, dt=dt)
|
|
|
+ feature_df['']
|
|
|
+ pass
|