data_query_tools.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. from odps import ODPS
  2. from odps.errors import ODPSError
  3. from datetime import date, timedelta
  4. from agent import tool
  5. def get_odps_data(sql):
  6. # 配置信息
  7. access_id = 'LTAI9EBa0bd5PrDa'
  8. access_key = 'vAalxds7YxhfOA2yVv8GziCg3Y87v5'
  9. project = 'loghubods'
  10. endpoint = 'http://service.odps.aliyun.com/api'
  11. # 1. 初始化 ODPS 入口
  12. o = ODPS(access_id, access_key, project, endpoint=endpoint)
  13. try:
  14. # 2. 执行 SQL 并获取结果
  15. # execute_sql 会等待任务完成,使用 open_reader 读取数据
  16. with o.execute_sql(sql).open_reader() as reader:
  17. # reader 类似于 Java 中的 List<Record>
  18. # 我们可以直接将其转换为 Python 的 list
  19. records = [record for record in reader]
  20. return records
  21. except ODPSError as e:
  22. print(f"ODPS 错误: {e}")
  23. return None
  24. def get_rov_by_merge_leve2_and_video_ids(merge_leve2, video_ids):
  25. merge_level_in_clause = f"'{merge_leve2}'"
  26. video_ids_in_clause = ", ".join([f"'{video_id}'" for video_id in video_ids])
  27. end_date = (date.today() - timedelta(days=1)).strftime("%Y%m%d")
  28. start_date = (date.today() - timedelta(days=14)).strftime("%Y%m%d")
  29. sql_query = f'''
  30. SELECT
  31. v.videoid,
  32. CASE
  33. WHEN COALESCE(SUM(COALESCE(t3.`当日分发曝光pv`, 0)), 0) < 1000 THEN 0
  34. ELSE COALESCE(AVG(NULLIF(t3.rov_t0, 0)), 0)
  35. END AS avg_rov_t0
  36. FROM
  37. (
  38. SELECT
  39. t2.videoid,
  40. t2.merge_leve2
  41. FROM videoods.content_profile t1
  42. JOIN loghubods.video_merge_tag t2
  43. ON t1.content_id = t2.videoid
  44. WHERE
  45. t1.status = 3
  46. AND t1.is_deleted = 0
  47. AND t2.merge_leve2 IN ({merge_level_in_clause})
  48. ) v
  49. LEFT JOIN loghubods.video_dimension_detail_add_column t3
  50. ON v.videoid = t3.视频id
  51. AND t3.dt >= '{start_date}'
  52. AND t3.dt <= '{end_date}'
  53. WHERE v.videoid in ({video_ids_in_clause})
  54. GROUP BY
  55. v.videoid
  56. ;
  57. '''
  58. data = get_odps_data(sql_query)
  59. result_dict = {}
  60. if data:
  61. result_dict = {r[0]: r[1] for r in data}
  62. return result_dict
  63. def get_changwen_weight(account_name):
  64. bizdatemax_date = date.today() - timedelta(days=1)
  65. bizdatemin_date = bizdatemax_date - timedelta(days=30)
  66. bizdatemax = bizdatemax_date.strftime("%Y%m%d")
  67. bizdatemin = bizdatemin_date.strftime("%Y%m%d")
  68. sql_query = f'''
  69. SELECT 公众号名
  70. ,videoid
  71. ,sum(头部曝光) as 头部曝光
  72. ,sum(头部realplay) as 头部realplay
  73. ,sum(头部分享) as 头部分享
  74. ,sum(头部回流人数) AS 头部回流数
  75. ,sum(推荐曝光数) as 推荐曝光数
  76. ,sum(推荐realplay) as 推荐realplay
  77. ,sum(推荐分享数) as 推荐分享数
  78. ,sum(推荐回流数) as 推荐回流数
  79. ,sum(当日回流进入分发曝光次数) AS vov分子
  80. FROM (
  81. SELECT DISTINCT a.公众号名
  82. ,a.videoid
  83. ,e.merge_leve1 AS 一级品类
  84. ,e.merge_leve2 AS 二级品类
  85. ,a.title
  86. ,a.进入分发人数
  87. ,头部曝光pv AS 头部曝光
  88. ,头部realplay_pv AS 头部realplay
  89. ,头部分享pv AS 头部分享
  90. ,a.当日分发曝光pv AS 推荐曝光数
  91. ,a.当日分发播放pv
  92. ,分发realplay_pv AS 推荐realplay
  93. ,分发realplay_pv / a.当日分发播放pv AS 真实播放率pv
  94. ,当日分发播放uv
  95. ,c.realplay_uv AS 分发真实播uv
  96. ,c.realplay_uv / a.当日分发播放uv AS 真实播放率uv
  97. ,a.当日分发分享pv AS 推荐分享数
  98. ,a.当日分发分享pv / a.当日分发曝光pv AS str
  99. ,NVL(b.当日分发回流人数,0) AS 推荐回流数
  100. ,NVL(b.当日回流进入分发人数,0) AS 当日回流进入分发人数
  101. ,NVL(b.当日回流进入分发曝光次数,0) AS 当日回流进入分发曝光次数
  102. ,NVL(b.当日回流进入分发曝光次数,0) / a.当日分发曝光pv AS vov分子
  103. ,d.头部回流人数
  104. FROM (
  105. SELECT account_name AS 公众号名
  106. ,videoid
  107. ,title
  108. ,COUNT(DISTINCT mid) AS 进入分发人数
  109. ,COUNT(
  110. CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoView' THEN mid END
  111. ) AS 当日分发曝光pv
  112. ,COUNT(
  113. CASE WHEN pagesource REGEXP 'pages/user-videos-share$' AND businesstype = 'videoView' THEN mid END
  114. ) AS 头部曝光pv
  115. ,COUNT(
  116. CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoPlay' THEN mid END
  117. ) AS 当日分发播放pv
  118. ,COUNT(DISTINCT
  119. CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoPlay' THEN mid END
  120. ) AS 当日分发播放uv
  121. ,COUNT(
  122. CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoShareFriend' THEN mid END
  123. ) AS 当日分发分享pv
  124. ,COUNT(
  125. CASE WHEN pagesource REGEXP 'pages/user-videos-share$' AND businesstype = 'videoShareFriend' THEN mid END
  126. ) AS 头部分享pv
  127. FROM (
  128. SELECT DISTINCT a.mid
  129. ,a.videoid
  130. ,a.businesstype
  131. ,a.pagesource
  132. ,a.subsessionid
  133. ,account_name
  134. ,e.title
  135. FROM loghubods.video_action_log_rp a
  136. LEFT JOIN loghubods.user_wechat_identity_info_ha b
  137. ON a.mid = CONCAT('weixin_openid_',b.open_id)
  138. AND b.dt = MAX_PT("loghubods.user_wechat_identity_info_ha")
  139. LEFT JOIN loghubods.gzh_fans_info d
  140. ON b.union_id = d.union_id
  141. AND d.dt = MAX_PT("loghubods.gzh_fans_info")
  142. LEFT JOIN videoods.wx_video e
  143. ON a.videoid = e.id
  144. WHERE a.dt >= '{bizdatemin}'
  145. AND a.dt <= '{bizdatemax}'
  146. AND businesstype IN ('videoView','videoPlay','videoShareFriend')
  147. AND d.user_create_time IS NOT NULL
  148. AND account_name = '{account_name}'
  149. ) t
  150. GROUP BY 公众号名
  151. ,videoid
  152. ,title
  153. ) a
  154. LEFT JOIN (
  155. SELECT t.account_name AS 公众号名
  156. ,t.videoid
  157. ,COUNT(DISTINCT s.machinecode) AS 当日分发回流人数
  158. ,COUNT(DISTINCT v.mid) AS 当日回流进入分发人数
  159. ,COUNT(v.mid) AS 当日回流进入分发曝光次数
  160. FROM (
  161. SELECT DISTINCT a.subsessionid
  162. ,a.videoid
  163. ,a.mid
  164. ,d.account_name
  165. ,GET_JSON_OBJECT(extparams,'$.recomTraceId') AS recomtraceid
  166. FROM loghubods.video_action_log_rp a
  167. LEFT JOIN loghubods.user_wechat_identity_info_ha b
  168. ON a.mid = CONCAT('weixin_openid_',b.open_id)
  169. AND b.dt = MAX_PT("loghubods.user_wechat_identity_info_ha")
  170. LEFT JOIN loghubods.gzh_fans_info d
  171. ON b.union_id = d.union_id
  172. AND d.dt = MAX_PT("loghubods.gzh_fans_info")
  173. WHERE a.dt >= '{bizdatemin}'
  174. AND a.dt <= '{bizdatemax}'
  175. AND a.businesstype = 'videoShareFriend'
  176. AND a.pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
  177. AND d.user_create_time IS NOT NULL
  178. AND d.account_name = '{account_name}'
  179. ) t
  180. LEFT JOIN (
  181. SELECT DISTINCT subsessionid
  182. ,machinecode
  183. ,recomtraceid
  184. ,clickobjectid
  185. FROM loghubods.user_share_log
  186. WHERE dt >= '{bizdatemin}'
  187. AND dt <= '{bizdatemax}'
  188. AND topic = 'click'
  189. ) s
  190. ON t.recomtraceid = s.recomtraceid
  191. AND t.videoid = s.clickobjectid
  192. LEFT JOIN (
  193. SELECT subsessionid
  194. ,mid
  195. ,videoid
  196. FROM loghubods.video_action_log_rp
  197. WHERE dt >= '{bizdatemin}'
  198. AND dt <= '{bizdatemax}'
  199. AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
  200. AND businesstype = 'videoView'
  201. ) v
  202. ON s.subsessionid = v.subsessionid
  203. AND s.machinecode = v.mid
  204. GROUP BY account_name
  205. ,t.videoid
  206. ) b
  207. ON a.公众号名 = b.公众号名
  208. AND a.videoid = b.videoid
  209. LEFT JOIN (
  210. SELECT d.account_name AS 公众号名
  211. ,a.videoid
  212. ,COUNT(DISTINCT a.mid) AS realplay_uv
  213. ,COUNT(
  214. CASE WHEN a.pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' THEN a.mid END
  215. ) AS 分发realplay_pv
  216. ,COUNT(CASE WHEN a.pagesource REGEXP 'pages/user-videos-share$' THEN a.mid END) AS 头部realplay_pv
  217. FROM loghubods.ods_video_play_log_day a
  218. LEFT JOIN (
  219. SELECT DISTINCT open_id
  220. ,union_id
  221. FROM loghubods.user_wechat_identity_info_ha
  222. WHERE dt = MAX_PT("loghubods.user_wechat_identity_info_ha")
  223. ) b
  224. ON a.mid = CONCAT('weixin_openid_',b.open_id)
  225. LEFT JOIN loghubods.gzh_fans_info d
  226. ON b.union_id = d.union_id
  227. AND d.dt = MAX_PT("loghubods.gzh_fans_info")
  228. WHERE a.dt >= '{bizdatemin}'
  229. AND a.dt <= '{bizdatemax}'
  230. AND a.businesstype = 'videoRealPlay'
  231. AND d.user_create_time IS NOT NULL
  232. AND d.account_name = '{account_name}'
  233. GROUP BY d.account_name
  234. ,a.videoid
  235. ORDER BY 分发realplay_pv DESC
  236. ) c
  237. ON a.公众号名 = c.公众号名
  238. AND a.videoid = c.videoid
  239. LEFT JOIN (
  240. SELECT t.account_name AS 公众号名
  241. ,t.videoid
  242. ,COUNT(DISTINCT s.machinecode) AS 头部回流人数
  243. FROM (
  244. SELECT DISTINCT a.shareobjectid AS videoid
  245. ,a.shareid
  246. ,a.machinecode
  247. ,d.account_name
  248. FROM loghubods.user_share_log a
  249. LEFT JOIN loghubods.user_wechat_identity_info_ha b
  250. ON a.machinecode = CONCAT('weixin_openid_',b.open_id)
  251. AND b.dt = MAX_PT("loghubods.user_wechat_identity_info_ha")
  252. LEFT JOIN loghubods.gzh_fans_info d
  253. ON b.union_id = d.union_id
  254. AND d.dt = MAX_PT("loghubods.gzh_fans_info")
  255. WHERE a.dt >= '{bizdatemin}'
  256. AND a.dt <= '{bizdatemax}'
  257. AND a.topic = 'share'
  258. AND a.pagesource REGEXP 'pages/user-videos-share$'
  259. AND d.user_create_time IS NOT NULL
  260. AND d.account_name = '{account_name}'
  261. ) t
  262. LEFT JOIN (
  263. SELECT DISTINCT shareid
  264. ,machinecode
  265. ,clickobjectid
  266. FROM loghubods.user_share_log
  267. WHERE dt >= '{bizdatemin}'
  268. AND dt <= '{bizdatemax}'
  269. AND topic = 'click'
  270. ) s
  271. ON t.shareid = s.shareid
  272. GROUP BY account_name
  273. ,t.videoid
  274. ) d
  275. ON a.公众号名 = d.公众号名
  276. AND a.videoid = d.videoid
  277. LEFT JOIN loghubods.video_merge_tag e
  278. ON a.videoid = e.videoid
  279. )
  280. GROUP BY 公众号名, videoid
  281. ORDER BY 推荐曝光数 DESC
  282. '''
  283. result_list = []
  284. data = get_odps_data(sql_query)
  285. if data:
  286. ext_keys = [
  287. "头部曝光",
  288. "头部realplay",
  289. "头部分享",
  290. "头部回流数",
  291. "推荐曝光数",
  292. "推荐realplay",
  293. "推荐分享数",
  294. "推荐回流数",
  295. "vov分子",
  296. ]
  297. for r in data:
  298. ext_data = {k: r[i + 2] for i, k in enumerate(ext_keys)}
  299. result_list.append(
  300. {
  301. "account_name": r[0],
  302. "videoid": r[1],
  303. "ext_data": ext_data,
  304. }
  305. )
  306. return result_list
  307. if __name__ == '__main__':
  308. result_list = get_changwen_weight('史趣探秘')
  309. print(result_list)