|
@@ -3,6 +3,7 @@ Created on Mon Mar 18, 2024
|
|
|
@author: luojunhui
|
|
|
Read data from odps and save to json file in local files
|
|
|
"""
|
|
|
+
|
|
|
import os
|
|
|
import sys
|
|
|
import json
|
|
@@ -33,38 +34,38 @@ class VideoDataGenerator(object):
|
|
|
result = []
|
|
|
for line in hour_data:
|
|
|
obj = {
|
|
|
- "uid": line['uid'],
|
|
|
- "video_id": line['videoid'],
|
|
|
- "type": line['type'],
|
|
|
- "channel": line['channel'],
|
|
|
- "fst": line['flowpool_start_type'],
|
|
|
- "fsl": line['flowpool_start_level'],
|
|
|
- "fet": line['flowpool_end_type'],
|
|
|
- "fel": line['flowpool_end_level'],
|
|
|
- "f_view": line['flowpool_distribute_view_times'],
|
|
|
- "f_share": line['flowpool_share_times'],
|
|
|
- "f_return": line['flowpool_return_users'],
|
|
|
- "f3_view": line['flowpool_3days_distribute_view_times'],
|
|
|
- "f3_share": line['flowpool_3days_share_times'],
|
|
|
- "f3_return": line['flowpool_3days_return_users'],
|
|
|
- "ros_dms": line['ros_dms'],
|
|
|
- "rov_dms": line['rov_dms'],
|
|
|
- "ros_sls": line['ros_sls'],
|
|
|
- "rov_sls": line['rov_sls'],
|
|
|
- "fans": line['fans'],
|
|
|
- "view_count_user_30days": line['view_cnt_user_30days'],
|
|
|
- "share_count_user_30days": line['share_cnt_user_30days'],
|
|
|
- "return_count_user_30days": line['return_cnt_user_30days'],
|
|
|
- "rov_user": line['rov_user'],
|
|
|
- "str_user": line['str_user'], # share / view
|
|
|
- "out_user_id": line['out_user_id'],
|
|
|
- "mode": line['strategy'],
|
|
|
- "out_play_cnt": line['out_play_cnt'],
|
|
|
- "out_like_cnt": line['out_like_cnt'],
|
|
|
- "out_share_cnt": line['out_share_cnt'],
|
|
|
- "out_collection_cnt": line['out_collection_cnt'],
|
|
|
- "up_level_time_hour": line['up_level_time_hour'],
|
|
|
- "dt": line['dt']
|
|
|
+ "uid": line["uid"],
|
|
|
+ "video_id": line["videoid"],
|
|
|
+ "type": line["type"],
|
|
|
+ "channel": line["channel"],
|
|
|
+ "fst": line["flowpool_start_type"],
|
|
|
+ "fsl": line["flowpool_start_level"],
|
|
|
+ "fet": line["flowpool_end_type"],
|
|
|
+ "fel": line["flowpool_end_level"],
|
|
|
+ "f_view": line["flowpool_distribute_view_times"],
|
|
|
+ "f_share": line["flowpool_share_times"],
|
|
|
+ "f_return": line["flowpool_return_users"],
|
|
|
+ "f3_view": line["flowpool_3days_distribute_view_times"],
|
|
|
+ "f3_share": line["flowpool_3days_share_times"],
|
|
|
+ "f3_return": line["flowpool_3days_return_users"],
|
|
|
+ "ros_dms": line["ros_dms"],
|
|
|
+ "rov_dms": line["rov_dms"],
|
|
|
+ "ros_sls": line["ros_sls"],
|
|
|
+ "rov_sls": line["rov_sls"],
|
|
|
+ "fans": line["fans"],
|
|
|
+ "view_count_user_30days": line["view_cnt_user_30days"],
|
|
|
+ "share_count_user_30days": line["share_cnt_user_30days"],
|
|
|
+ "return_count_user_30days": line["return_cnt_user_30days"],
|
|
|
+ "rov_user": line["rov_user"],
|
|
|
+ "str_user": line["str_user"], # share / view
|
|
|
+ "out_user_id": line["out_user_id"],
|
|
|
+ "mode": line["strategy"],
|
|
|
+ "out_play_cnt": line["out_play_cnt"],
|
|
|
+ "out_like_cnt": line["out_like_cnt"],
|
|
|
+ "out_share_cnt": line["out_share_cnt"],
|
|
|
+ "out_collection_cnt": line["out_collection_cnt"],
|
|
|
+ "up_level_time_hour": line["up_level_time_hour"],
|
|
|
+ "dt": line["dt"],
|
|
|
}
|
|
|
result.append(obj)
|
|
|
return result
|
|
@@ -79,17 +80,17 @@ class VideoDataGenerator(object):
|
|
|
data = self.oo.select(sql)
|
|
|
result = [
|
|
|
{
|
|
|
- "video_id": item['videoid'],
|
|
|
- "total_view": item['flowpool_distribute_view_times'],
|
|
|
- "total_share": item['flowpool_share_times'],
|
|
|
- "total_return": item['flowpool_return_users'],
|
|
|
- "3day_view": item['flowpool_3days_distribute_view_times'],
|
|
|
- "3day_share": item['flowpool_3days_share_times'],
|
|
|
- "3day_return": item['flowpool_3days_return_users'],
|
|
|
- "3day_up_level": item['up_level_3_days'],
|
|
|
- "dt": item['dt']
|
|
|
-
|
|
|
- } for item in data
|
|
|
+ "video_id": item["videoid"],
|
|
|
+ "total_view": item["flowpool_distribute_view_times"],
|
|
|
+ "total_share": item["flowpool_share_times"],
|
|
|
+ "total_return": item["flowpool_return_users"],
|
|
|
+ "3day_view": item["flowpool_3days_distribute_view_times"],
|
|
|
+ "3day_share": item["flowpool_3days_share_times"],
|
|
|
+ "3day_return": item["flowpool_3days_return_users"],
|
|
|
+ "3day_up_level": item["up_level_3_days"],
|
|
|
+ "dt": item["dt"],
|
|
|
+ }
|
|
|
+ for item in data
|
|
|
]
|
|
|
return result
|
|
|
|
|
@@ -109,7 +110,7 @@ def save_daily_data(start_date, end_date, save_path):
|
|
|
L[date_str] = {}
|
|
|
data_list = V.get_daily_data(date_str)
|
|
|
for obj in tqdm(data_list):
|
|
|
- video_id = obj['video_id']
|
|
|
+ video_id = obj["video_id"]
|
|
|
L[date_str][video_id] = obj
|
|
|
with open(save_path, "w") as f:
|
|
|
f.write(json.dumps(L, ensure_ascii=False))
|
|
@@ -151,7 +152,7 @@ def save_hourly_data(start_date, end_date, save_path):
|
|
|
# f.write(json.dumps(L, ensure_ascii=False))
|
|
|
|
|
|
|
|
|
-if __name__ == '__main__':
|
|
|
+if __name__ == "__main__":
|
|
|
flag = int(input("请输入标识符,输入 1 获取小时级数据, 输入 2 获取天级数据: \n"))
|
|
|
if flag == 1:
|
|
|
start = str(input("请输入开始字符串, 格式为 yyyymmddhh:\n"))
|
|
@@ -171,4 +172,3 @@ if __name__ == '__main__':
|
|
|
print("Time format is not ok")
|
|
|
else:
|
|
|
print("Input Error ! Make sure your input is 1 or 2!!")
|
|
|
-
|