ad_action_log_get.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import traceback
  2. import datetime
  3. import pandas as pd
  4. from odps import ODPS
  5. from collections import defaultdict
  6. from config import set_config
  7. from log import Log
  8. config_ = set_config()
  9. log_ = Log()
  10. now_date = datetime.datetime.today()
  11. log_.info(f"now: {datetime.datetime.strftime(now_date, '%Y%m%d')}")
  12. dt = datetime.datetime.strftime(now_date - datetime.timedelta(days=1), '%Y%m%d')
  13. project = 'loghubods'
  14. odps = ODPS(
  15. access_id=config_.ODPS_CONFIG['ACCESSID'],
  16. secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
  17. project=project,
  18. endpoint=config_.ODPS_CONFIG['ENDPOINT'],
  19. )
  20. sql = f"""
  21. SELECT pqtid -- 串联每次广告从请求到点击整个链路唯一标识
  22. ,apptype
  23. ,machinecode -- mid
  24. ,networktype -- 网络状态
  25. ,brand -- 手机品牌
  26. ,platform -- 操作系统
  27. ,GET_JSON_OBJECT(machineinfo,'$.weChatVersion') AS weChatVersion -- 微信版本号
  28. ,GET_JSON_OBJECT(machineinfo,'$.sdkVersion') AS sdkVersion -- 微信小程序基础库版本号
  29. ,softversion -- 票圈版本号
  30. ,ownadpositionid -- 自营广告位id
  31. ,planid -- 广告计划id
  32. ,ownaddetailid -- 广告id
  33. ,clienttimestamp
  34. ,clientip
  35. ,ANALYSISIP(clientip,"region") AS province
  36. ,ANALYSISIP(clientip,"city") AS city
  37. ,headvideoid
  38. ,businesstype
  39. FROM loghubods.ad_action_log_own
  40. WHERE dt = {dt}
  41. AND ownadsystemtype = 'own'
  42. AND (
  43. businesstype = 'adView'
  44. OR businesstype = 'adClick'
  45. )
  46. ;
  47. """
  48. with odps.execute_sql(sql=sql).open_reader() as reader:
  49. d = defaultdict(list) # collection默认一个dict
  50. for record in reader:
  51. for res in record:
  52. # print(res)
  53. d[res[0]].append(res[1]) # 解析record中的每一个元组,存储方式为(k,v),以k作为key,存储每一列的内容;
  54. data = pd.DataFrame.from_dict(d, orient='index').T # 转换为数据框,并转置,不转置的话是横条数据
  55. print(data)
  56. data.to_csv(f"{dt}.csv", index=False)