1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- import pandas as pd
- from utils import get_data_from_odps
- features = [
- 'apptype',
- 'videoid',
- 'mid',
- 'ad_mid',
- 'mid_preview_count',
- 'mid_view_count',
- 'mid_view_count_pv',
- 'mid_play_count',
- 'mid_play_count_pv',
- 'mid_share_count',
- 'mid_share_count_pv',
- 'mid_return_count',
- 'mid_share_rate',
- 'mid_return_rate',
- 'video_preview_count_uv',
- 'video_preview_count_pv',
- 'video_view_count_uv',
- 'video_view_count_pv',
- 'video_play_count_uv',
- 'video_play_count_pv',
- 'video_share_count_uv',
- 'video_share_count_pv',
- 'video_return_count',
- 'video_ctr_uv',
- 'video_ctr_pv',
- 'video_share_rate_uv',
- 'video_share_rate_pv',
- 'video_return_rate'
- 'share_videoid'
- ]
- def get_feature_data(project, table, features, dt):
- """获取特征数据"""
- records = get_data_from_odps(date=dt, project=project, table=table)
- feature_data = []
- for record in records:
- item = {}
- for feature_name in features:
- item[feature_name] = record[feature_name]
- feature_data.append(item)
- feature_df = pd.DataFrame(feature_data)
- return feature_df
- def daily_data_process(project, table, features, dt):
- feature_df = get_feature_data(project=project, table=table, features=features, dt=dt)
- feature_df['']
- pass
|