liqian 1 年之前
父节点
当前提交
224c2cd903
共有 1 个文件被更改,包括 53 次插入0 次删除
  1. 53 0
      ad_feature_process.py

+ 53 - 0
ad_feature_process.py

@@ -0,0 +1,53 @@
+import pandas as pd
+from utils import get_data_from_odps
+
+features = [
+    'apptype',
+    'videoid',
+    'mid',
+    'ad_mid',
+    'mid_preview_count',
+    'mid_view_count',
+    'mid_view_count_pv',
+    'mid_play_count',
+    'mid_play_count_pv',
+    'mid_share_count',
+    'mid_share_count_pv',
+    'mid_return_count',
+    'mid_share_rate',
+    'mid_return_rate',
+    'video_preview_count_uv',
+    'video_preview_count_pv',
+    'video_view_count_uv',
+    'video_view_count_pv',
+    'video_play_count_uv',
+    'video_play_count_pv',
+    'video_share_count_uv',
+    'video_share_count_pv',
+    'video_return_count',
+    'video_ctr_uv',
+    'video_ctr_pv',
+    'video_share_rate_uv',
+    'video_share_rate_pv',
+    'video_return_rate'
+    'share_videoid'
+]
+
+
+def get_feature_data(project, table, features, dt):
+    """获取特征数据"""
+    records = get_data_from_odps(date=dt, project=project, table=table)
+    feature_data = []
+    for record in records:
+        item = {}
+        for feature_name in features:
+            item[feature_name] = record[feature_name]
+        feature_data.append(item)
+    feature_df = pd.DataFrame(feature_data)
+    return feature_df
+
+
+def daily_data_process(project, table, features, dt):
+    feature_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    feature_df['']
+    pass