|  | @@ -0,0 +1,60 @@
 | 
											
												
													
														|  | 
 |  | +import traceback
 | 
											
												
													
														|  | 
 |  | +import datetime
 | 
											
												
													
														|  | 
 |  | +import pandas as pd
 | 
											
												
													
														|  | 
 |  | +from odps import ODPS
 | 
											
												
													
														|  | 
 |  | +from collections import defaultdict
 | 
											
												
													
														|  | 
 |  | +from config import set_config
 | 
											
												
													
														|  | 
 |  | +from log import Log
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +config_ = set_config()
 | 
											
												
													
														|  | 
 |  | +log_ = Log()
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +now_date = datetime.datetime.today()
 | 
											
												
													
														|  | 
 |  | +log_.info(f"now: {datetime.datetime.strftime(now_date, '%Y%m%d')}")
 | 
											
												
													
														|  | 
 |  | +dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
 | 
											
												
													
														|  | 
 |  | +project = 'loghubods'
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +odps = ODPS(
 | 
											
												
													
														|  | 
 |  | +        access_id=config_.ODPS_CONFIG['ACCESSID'],
 | 
											
												
													
														|  | 
 |  | +        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
 | 
											
												
													
														|  | 
 |  | +        project=project,
 | 
											
												
													
														|  | 
 |  | +        endpoint=config_.ODPS_CONFIG['ENDPOINT'],
 | 
											
												
													
														|  | 
 |  | +    )
 | 
											
												
													
														|  | 
 |  | +sql = f"""
 | 
											
												
													
														|  | 
 |  | +SELECT  pqtid -- 串联每次广告从请求到点击整个链路唯一标识
 | 
											
												
													
														|  | 
 |  | +        ,apptype
 | 
											
												
													
														|  | 
 |  | +        ,machinecode -- mid
 | 
											
												
													
														|  | 
 |  | +        ,networktype -- 网络状态
 | 
											
												
													
														|  | 
 |  | +        ,brand -- 手机品牌
 | 
											
												
													
														|  | 
 |  | +        ,platform -- 操作系统
 | 
											
												
													
														|  | 
 |  | +        ,GET_JSON_OBJECT(machineinfo,'$.weChatVersion') AS weChatVersion -- 微信版本号
 | 
											
												
													
														|  | 
 |  | +        ,GET_JSON_OBJECT(machineinfo,'$.sdkVersion') AS sdkVersion -- 微信小程序基础库版本号
 | 
											
												
													
														|  | 
 |  | +        ,softversion -- 票圈版本号
 | 
											
												
													
														|  | 
 |  | +        ,ownadpositionid -- 自营广告位id
 | 
											
												
													
														|  | 
 |  | +        ,planid -- 广告计划id
 | 
											
												
													
														|  | 
 |  | +        ,ownaddetailid -- 广告id
 | 
											
												
													
														|  | 
 |  | +        ,clienttimestamp
 | 
											
												
													
														|  | 
 |  | +        ,clientip
 | 
											
												
													
														|  | 
 |  | +        ,ANALYSISIP(clientip,"region") AS province
 | 
											
												
													
														|  | 
 |  | +        ,ANALYSISIP(clientip,"city") AS city
 | 
											
												
													
														|  | 
 |  | +        ,headvideoid
 | 
											
												
													
														|  | 
 |  | +        ,businesstype
 | 
											
												
													
														|  | 
 |  | +FROM    loghubods.ad_action_log_own
 | 
											
												
													
														|  | 
 |  | +WHERE   dt = {dt}
 | 
											
												
													
														|  | 
 |  | +AND     ownadsystemtype = 'own'
 | 
											
												
													
														|  | 
 |  | +AND     (
 | 
											
												
													
														|  | 
 |  | +            businesstype = 'adView'
 | 
											
												
													
														|  | 
 |  | +            OR      businesstype = 'adClick'
 | 
											
												
													
														|  | 
 |  | +)
 | 
											
												
													
														|  | 
 |  | +;
 | 
											
												
													
														|  | 
 |  | +"""
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +with odps.execute_sql(sql=sql).open_reader() as reader:
 | 
											
												
													
														|  | 
 |  | +    d = defaultdict(list)  # collection默认一个dict
 | 
											
												
													
														|  | 
 |  | +    for record in reader:
 | 
											
												
													
														|  | 
 |  | +        for res in record:
 | 
											
												
													
														|  | 
 |  | +            # print(res)
 | 
											
												
													
														|  | 
 |  | +            d[res[0]].append(res[1])  # 解析record中的每一个元组,存储方式为(k,v),以k作为key,存储每一列的内容;
 | 
											
												
													
														|  | 
 |  | +    data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框,并转置,不转置的话是横条数据
 | 
											
												
													
														|  | 
 |  | +    print(data)
 | 
											
												
													
														|  | 
 |  | +    data.to_csv(f"{dt}.csv", index=False)
 |