1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 |
- import pandas as pd
- from datetime import datetime
- from utils import get_data_from_odps
- from config import set_config
- from log import Log
- config_, env = set_config()
- log_ = Log()
- features = [
- 'apptype',
- 'videoid',
- 'preview人数', # 过去24h预曝光人数
- 'view人数', # 过去24h曝光人数
- 'play人数', # 过去24h播放人数
- 'share人数', # 过去24h分享人数
- '回流人数', # 过去24h分享,过去24h回流人数
- 'preview次数', # 过去24h预曝光次数
- 'view次数', # 过去24h曝光次数
- 'play次数', # 过去24h播放次数
- 'share次数', # 过去24h分享次数
- 'platform_return',
- 'platform_preview',
- 'platform_preview_total',
- 'platform_show',
- 'platform_show_total',
- 'platform_view',
- 'platform_view_total',
- ]
- def get_feature_data(now_date, now_h, project, table):
- """获取特征数据"""
- dt = datetime.strftime(now_date, '%Y%m%d%H')
- log_.info({'feature_dt': dt})
- records = get_data_from_odps(date=dt, project=project, table=table)
- feature_data = []
- for record in records:
- item = {}
- for feature_name in features:
- item[feature_name] = record[feature_name]
- feature_data.append(item)
- feature_df = pd.DataFrame(feature_data)
- return feature_df
- def main():
- now_date = datetime.today()
- log_.info(f"now_date: {datetime.strftime(now_date, '%Y%m%d%H')}")
- project_24h = config_.PROJECT_24H_APP_TYPE
- table_24h = config_.TABLE_24H_APP_TYPE
- now_min = datetime.now().minute
- now_h = datetime.now().hour
|