ad_users_data_update.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import datetime
  2. import pandas as pd
  3. from utils import get_data_from_odps
  4. from config import set_config
  5. from log import Log
  6. config_, _ = set_config()
  7. log_ = Log()
  8. def get_feature_data(project, table, features, now_date):
  9. """获取特征数据"""
  10. dt = datetime.datetime.strftime(now_date, '%Y%m%d')
  11. records = get_data_from_odps(date=dt, project=project, table=table)
  12. feature_data = []
  13. for record in records:
  14. item = {}
  15. for feature_name in features:
  16. item[feature_name] = record[feature_name]
  17. feature_data.append(item)
  18. feature_df = pd.DataFrame(feature_data)
  19. return feature_df
  20. def predict_user_group_share_rate(now_date):
  21. """预估用户组对应的有广告时分享率"""
  22. # 获取用户组特征
  23. project = config_.ad_model_data['users_share_rate'].get('project')
  24. table = config_.ad_model_data['users_share_rate'].get('table')
  25. features = [
  26. 'apptype',
  27. 'group',
  28. 'sharerate_all',
  29. 'sharerate_ad'
  30. ]
  31. user_group_df = get_feature_data(project=project, table=table, features=features, now_date=now_date)
  32. user_group_df['sharerate_all'] = user_group_df['sharerate_all'].astype(float)
  33. user_group_df['sharerate_ad'] = user_group_df['sharerate_ad'].astype(float)
  34. # 获取有广告时所有用户组近30天的分享率
  35. ad_all_group_share_rate = user_group_df[user_group_df['group'] == 'allmids']['sharerate_ad']
  36. user_group_df = user_group_df[user_group_df['group'] != 'allmids']
  37. # 计算用户组有广告时分享率
  38. user_group_df['group_ad_share_rate'] = \
  39. user_group_df['sharerate_ad'] * float(ad_all_group_share_rate) / user_group_df['sharerate_all']
  40. return user_group_df
  41. def predict_video_share_rate(now_date):
  42. """预估视频有广告时分享率"""
  43. # 获取视频特征
  44. project = config_.ad_model_data['videos_share_rate'].get('project')
  45. table = config_.ad_model_data['videos_share_rate'].get('table')
  46. features = [
  47. 'apptype',
  48. 'videoid',
  49. 'sharerate_all',
  50. 'sharerate_ad'
  51. ]
  52. video_df = get_feature_data(project=project, table=table, features=features, now_date=now_date)
  53. video_df['sharerate_all'] = video_df['sharerate_all'].astype(float)
  54. video_df['sharerate_ad'] = video_df['sharerate_ad'].astype(float)
  55. # 获取有广告时所有视频近30天的分享率
  56. ad_all_videos_share_rate = video_df[video_df['videoid'] == 'allvideos']['sharerate_ad']
  57. video_df = video_df[video_df['videoid'] != 'allvideos']
  58. # 计算视频有广告时分享率
  59. video_df['video_ad_share_rate'] = \
  60. video_df['sharerate_ad'] * float(ad_all_videos_share_rate) / video_df['sharerate_all']
  61. return video_df