ad_feature_process.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import pandas as pd
  2. from utils import get_data_from_odps
  3. features = [
  4. 'apptype',
  5. 'videoid',
  6. 'mid',
  7. 'ad_mid',
  8. 'mid_preview_count',
  9. 'mid_view_count',
  10. 'mid_view_count_pv',
  11. 'mid_play_count',
  12. 'mid_play_count_pv',
  13. 'mid_share_count',
  14. 'mid_share_count_pv',
  15. 'mid_return_count',
  16. 'mid_share_rate',
  17. 'mid_return_rate',
  18. 'video_preview_count_uv',
  19. 'video_preview_count_pv',
  20. 'video_view_count_uv',
  21. 'video_view_count_pv',
  22. 'video_play_count_uv',
  23. 'video_play_count_pv',
  24. 'video_share_count_uv',
  25. 'video_share_count_pv',
  26. 'video_return_count',
  27. 'video_ctr_uv',
  28. 'video_ctr_pv',
  29. 'video_share_rate_uv',
  30. 'video_share_rate_pv',
  31. 'video_return_rate'
  32. 'share_videoid'
  33. ]
  34. def get_feature_data(project, table, features, dt):
  35. """获取特征数据"""
  36. records = get_data_from_odps(date=dt, project=project, table=table)
  37. feature_data = []
  38. for record in records:
  39. item = {}
  40. for feature_name in features:
  41. item[feature_name] = record[feature_name]
  42. feature_data.append(item)
  43. feature_df = pd.DataFrame(feature_data)
  44. return feature_df
  45. def daily_data_process(project, table, features, dt):
  46. feature_df = get_feature_data(project=project, table=table, features=features, dt=dt)
  47. feature_df['']
  48. pass