loghubods.dwd_recsys_alg_exposure_base_20250108.sql 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773
  1. --@exclude_input=loghubods.video_action_log_flow_new
  2. --@exclude_input=loghubods.user_share_log_flow
  3. --*********************
  4. -- alg_recsys_rank_labelmatch_20250108
  5. --*********************
  6. --drop table loghubods.dwd_recsys_alg_exposure_base_20250108;
  7. CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_base_20250108
  8. (
  9. apptype STRING
  10. ,uid STRING
  11. ,mid STRING
  12. ,vid STRING
  13. ,sessionid STRING
  14. ,subsessionid STRING
  15. ,pagesource STRING
  16. ,page STRING
  17. ,recommendlogvo STRING COMMENT '推荐算法的返回结果日志存在这个字段中'
  18. ,abcode STRING COMMENT '推荐算法的ab分组:ab0'
  19. ,recommendpagetype STRING COMMENT '用于区分pagesource相同时某些场景的。三种回流头部;两种下滑-沉浸页下滑和feed下滑。 -pages/user-videos-share-recommend-detail 是沉浸页。'
  20. ,recomtraceid STRING COMMENT '在后端调取推荐服务之前生成。前端降级会空;后端也可能为空。'
  21. ,headvideoid STRING
  22. ,rootsourceid STRING COMMENT '区分touliu等流量,咨询产品。'
  23. ,hotsencetype STRING
  24. ,flowpool STRING COMMENT '非流量池,是空字符串。没有null值。'
  25. ,level STRING COMMENT '非流量池,是null。'
  26. ,clientip STRING
  27. ,machineinfo_brand STRING
  28. ,machineinfo_model STRING
  29. ,machineinfo_system STRING
  30. ,machineinfo_wechatversion STRING
  31. ,machineinfo_sdkversion STRING
  32. ,province STRING
  33. ,city STRING
  34. ,ts STRING
  35. ,is_share STRING
  36. ,share_cnt STRING
  37. ,is_return_1 STRING
  38. ,return_1_pv STRING
  39. ,return_1_uv STRING
  40. ,return_1_mids STRING
  41. ,is_return_n STRING
  42. ,return_n_pv STRING
  43. ,return_n_uv STRING
  44. ,return_n_mids STRING
  45. ,is_return_noself STRING
  46. ,return_1_uv_noself STRING
  47. ,return_1_mids_noself STRING
  48. ,is_return_n_noself STRING
  49. ,return_n_uv_noself STRING
  50. ,return_n_mids_noself STRING
  51. ,new_exposure_cnt STRING
  52. ,extend STRING
  53. )
  54. PARTITIONED BY
  55. (
  56. dt STRING COMMENT '日期:20240105'
  57. ,hh STRING COMMENT '小时:04'
  58. )
  59. STORED AS ALIORC
  60. TBLPROPERTIES ('comment' = '推荐算法-labelmatch表-20250108更新最新版')
  61. LIFECYCLE 3650
  62. ;
  63. SET hive.exec.dynamic.partition = true
  64. ;
  65. SET hive.exec.dynamic.partition.mode = nonstrict
  66. ;
  67. SET odps.stage.mapper.split.size = 1024
  68. ;
  69. INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_base_20250108 PARTITION (dt,hh)
  70. WITH t_return AS
  71. (
  72. SELECT *
  73. ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
  74. FROM (
  75. SELECT CONCAT(year,month,day,hour) AS dthh
  76. ,apptype
  77. ,machinecode AS mid
  78. ,clickobjectid AS vid
  79. ,sessionid
  80. ,subsessionid -- 注意这是回流对应的subsessionid,每次回流点击会重置,可以通过这个字段找到回流的曝光。
  81. ,shareid
  82. ,rootshareid
  83. ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
  84. ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
  85. ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
  86. FROM loghubods.user_share_log_flow -- 回流行为,理应subsessionid只有一条,但有脏数据,去重。
  87. WHERE CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
  88. AND __topic__ = 'click'
  89. AND apptype IS NOT NULL
  90. AND apptype NOT IN ('12') -- 12的pagesoucre是h5-share和h5-detail 暂时过滤掉 不做处理
  91. AND machinecode IS NOT NULL
  92. AND clickobjectid IS NOT NULL
  93. AND pagesource REGEXP "-pages/user-videos-share$" -- 存在脏数据 vlog-gzh /mine/mine-info$ 结尾的,都过滤掉。
  94. )
  95. WHERE rn = 1
  96. )
  97. ,t_share_from_sharelog AS
  98. (
  99. SELECT *
  100. FROM (
  101. SELECT CONCAT(year,month,day,hour) AS dthh
  102. ,apptype
  103. ,machinecode AS mid
  104. ,shareobjectid AS vid
  105. ,sessionid
  106. ,subsessionid
  107. ,pagesource
  108. ,shareid
  109. ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
  110. ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
  111. FROM loghubods.user_share_log_flow
  112. WHERE CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
  113. AND __topic__ = 'share'
  114. AND apptype IS NOT NULL
  115. AND apptype NOT IN ('12')
  116. AND machinecode IS NOT NULL
  117. AND shareobjectid IS NOT NULL
  118. )
  119. WHERE rn = 1
  120. )
  121. ,t_exposure AS
  122. (
  123. SELECT dthh_id
  124. ,dthh
  125. ,apptype
  126. ,uid
  127. ,mid
  128. ,vid
  129. ,sessionid
  130. ,subsessionid
  131. ,rootsessionid_new
  132. ,pagesource
  133. ,recommendlogvo
  134. ,abcode
  135. ,recommendpagetype
  136. ,recomtraceid
  137. ,headvideoid
  138. ,rootsourceid
  139. ,hotsencetype
  140. ,animationscenetype
  141. ,JSON_PARSE(IF(JSON_VALID(extparams),extparams,"{}")) AS extParams
  142. ,flowpool
  143. ,level
  144. ,clientip
  145. ,machineinfo_brand
  146. ,machineinfo_model
  147. ,machineinfo_system
  148. ,machineinfo_wechatversion
  149. ,machineinfo_sdkversion
  150. ,province
  151. ,city
  152. ,versioncode
  153. ,ts
  154. ,rn
  155. ,id
  156. ,dt
  157. ,hh
  158. FROM loghubods.dwd_recsys_alg_exposure_base_view_20250402
  159. WHERE CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
  160. )
  161. ,t_exposure_recommend AS
  162. (
  163. SELECT *
  164. FROM t_exposure
  165. WHERE pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
  166. )
  167. ,t_return_exposure_1 AS -- 曝光关联回流,用于计算viewh24
  168. (
  169. SELECT *
  170. FROM (
  171. SELECT t1.id AS exposure_id
  172. ,t1.mid AS mid
  173. ,t1.vid AS vid
  174. ,t1.subsessionid AS subsessionid
  175. ,t1.sessionid AS sessionid
  176. ,t1.headvideoid AS headvideoid
  177. ,t1.dthh
  178. ,t2.id AS return_id
  179. ,ROW_NUMBER() OVER (PARTITION BY t1.id ORDER BY t2.ts DESC ) AS rn
  180. FROM t_exposure_recommend t1
  181. LEFT JOIN t_return t2
  182. ON t1.mid = t2.mid
  183. AND t1.headvideoid = t2.vid
  184. AND t1.subsessionid = t2.subsessionid
  185. )
  186. WHERE rn = 1
  187. )
  188. ,t_return_exposure_2 AS -- 曝光关联回流,用于计算viewh24
  189. (
  190. SELECT *
  191. FROM (
  192. SELECT t1.exposure_id AS exposure_id
  193. ,t1.mid AS mid
  194. ,t1.vid AS vid
  195. ,t1.subsessionid AS subsessionid
  196. ,t1.sessionid AS sessionid
  197. ,t1.headvideoid AS headvideoid
  198. ,t1.dthh
  199. ,t2.id AS return_id
  200. ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
  201. FROM (
  202. SELECT *
  203. FROM t_return_exposure_1
  204. WHERE return_id IS NULL
  205. ) t1
  206. LEFT JOIN t_return t2
  207. ON t1.mid = t2.mid
  208. AND t1.headvideoid = t2.vid
  209. AND t1.sessionid = t2.sessionid
  210. )
  211. WHERE rn = 1
  212. )
  213. ,t_return_exposure_3 AS -- 曝光关联回流,用于计算viewh24
  214. (
  215. SELECT *
  216. FROM (
  217. SELECT t1.exposure_id AS exposure_id
  218. ,t1.mid AS mid
  219. ,t1.vid AS vid
  220. ,t1.subsessionid AS subsessionid
  221. ,t1.sessionid AS sessionid
  222. ,t1.headvideoid AS headvideoid
  223. ,t1.dthh
  224. ,t2.id AS return_id
  225. ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
  226. FROM (
  227. SELECT *
  228. FROM t_return_exposure_2
  229. WHERE return_id IS NULL
  230. ) t1
  231. LEFT JOIN t_return t2
  232. ON t1.mid = t2.mid
  233. AND t1.subsessionid = t2.subsessionid
  234. )
  235. WHERE rn = 1
  236. )
  237. ,t_return_exposure_4 AS -- 曝光关联回流,用于计算viewh24
  238. (
  239. SELECT *
  240. FROM (
  241. SELECT t1.exposure_id AS exposure_id
  242. ,t1.mid AS mid
  243. ,t1.vid AS vid
  244. ,t1.subsessionid AS subsessionid
  245. ,t1.sessionid AS sessionid
  246. ,t1.headvideoid AS headvideoid
  247. ,t1.dthh
  248. ,t2.id AS return_id
  249. ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
  250. FROM (
  251. SELECT *
  252. FROM t_return_exposure_3
  253. WHERE return_id IS NULL
  254. ) t1
  255. LEFT JOIN t_return t2
  256. ON t1.mid = t2.mid
  257. AND t1.sessionid = t2.sessionid
  258. )
  259. WHERE rn = 1
  260. )
  261. ,t_return_exposure AS
  262. (
  263. SELECT a.*
  264. ,b.exposure_cnt AS new_exposure_cnt
  265. FROM t_return a
  266. LEFT JOIN (
  267. SELECT return_id
  268. ,COUNT(1) AS exposure_cnt
  269. FROM (
  270. SELECT *
  271. FROM t_return_exposure_1
  272. WHERE return_id IS NOT NULL
  273. UNION ALL
  274. SELECT *
  275. FROM t_return_exposure_2
  276. WHERE return_id IS NOT NULL
  277. UNION ALL
  278. SELECT *
  279. FROM t_return_exposure_3
  280. WHERE return_id IS NOT NULL
  281. UNION ALL
  282. SELECT *
  283. FROM t_return_exposure_4
  284. WHERE return_id IS NOT NULL
  285. )
  286. GROUP BY return_id
  287. ) b
  288. ON a.id = b.return_id
  289. )
  290. ,t_normal_share_exposure_1 AS -- 开始处理常规的分享与曝光关联
  291. (
  292. SELECT *
  293. FROM (
  294. SELECT t1.dthh
  295. ,t1.apptype
  296. ,t1.mid
  297. ,t1.vid
  298. ,t1.sessionid
  299. ,t1.subsessionid
  300. ,t1.pagesource
  301. ,t1.shareid
  302. ,t1.ts
  303. ,t2.id AS exposure_id
  304. ,t2.ts AS exposure_ts
  305. ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
  306. FROM t_share_from_sharelog t1
  307. LEFT JOIN t_exposure t2
  308. ON t1.apptype = t2.apptype
  309. AND t1.mid = t2.mid
  310. AND t1.vid = t2.vid
  311. AND t1.subsessionid = t2.subsessionid
  312. AND t1.pagesource = t2.pagesource
  313. AND t1.ts >= t2.ts
  314. WHERE t1.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
  315. )
  316. WHERE rn = 1
  317. )
  318. ,t_normal_share_exposure_2 AS
  319. (
  320. SELECT *
  321. FROM (
  322. SELECT t1.dthh
  323. ,t1.apptype
  324. ,t1.mid
  325. ,t1.vid
  326. ,t1.sessionid
  327. ,t1.subsessionid
  328. ,t1.pagesource
  329. ,t1.shareid
  330. ,t1.ts
  331. ,t2.id AS exposure_id
  332. ,t2.ts AS exposure_ts
  333. ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
  334. FROM (
  335. SELECT *
  336. FROM t_normal_share_exposure_1
  337. WHERE exposure_id IS NULL
  338. ) t1
  339. LEFT JOIN t_exposure t2
  340. ON t1.apptype = t2.apptype
  341. AND t1.mid = t2.mid
  342. AND t1.vid = t2.vid
  343. AND t1.sessionid = t2.sessionid
  344. AND t1.pagesource = t2.pagesource
  345. AND t1.ts >= t2.ts
  346. )
  347. WHERE rn = 1
  348. )
  349. ,t_normal_share_exposure_3 AS
  350. (
  351. SELECT *
  352. FROM (
  353. SELECT t1.dthh
  354. ,t1.apptype
  355. ,t1.mid
  356. ,t1.vid
  357. ,t1.sessionid
  358. ,t1.subsessionid
  359. ,t1.pagesource
  360. ,t1.shareid
  361. ,t1.ts
  362. ,t2.id AS exposure_id
  363. ,t2.ts AS exposure_ts
  364. ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
  365. FROM (
  366. SELECT *
  367. FROM t_normal_share_exposure_2
  368. WHERE exposure_id IS NULL
  369. ) t1
  370. LEFT JOIN t_exposure t2
  371. ON t1.apptype = t2.apptype
  372. AND t1.mid = t2.mid
  373. AND t1.vid = t2.vid
  374. AND t1.subsessionid = t2.subsessionid
  375. AND t1.pagesource = t2.pagesource
  376. )
  377. WHERE rn = 1
  378. )
  379. ,t_normal_share_exposure_4 AS
  380. (
  381. SELECT *
  382. FROM (
  383. SELECT t1.dthh
  384. ,t1.apptype
  385. ,t1.mid
  386. ,t1.vid
  387. ,t1.sessionid
  388. ,t1.subsessionid
  389. ,t1.pagesource
  390. ,t1.shareid
  391. ,t1.ts
  392. ,t2.id AS exposure_id
  393. ,t2.ts AS exposure_ts
  394. ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
  395. FROM (
  396. SELECT *
  397. FROM t_normal_share_exposure_3
  398. WHERE exposure_id IS NULL
  399. ) t1
  400. LEFT JOIN t_exposure t2
  401. ON t1.apptype = t2.apptype
  402. AND t1.mid = t2.mid
  403. AND t1.vid = t2.vid
  404. AND t1.sessionid = t2.sessionid
  405. AND t1.pagesource = t2.pagesource
  406. )
  407. WHERE rn = 1
  408. )
  409. ,t_normal_share_exposure_5 AS
  410. (
  411. SELECT *
  412. FROM (
  413. SELECT t1.dthh
  414. ,t1.apptype
  415. ,t1.mid
  416. ,t1.vid
  417. ,t1.sessionid
  418. ,t1.subsessionid
  419. ,t1.pagesource
  420. ,t1.shareid
  421. ,t1.ts
  422. ,t2.id AS exposure_id
  423. ,t2.ts AS exposure_ts
  424. ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
  425. FROM (
  426. SELECT *
  427. FROM t_normal_share_exposure_4
  428. WHERE exposure_id IS NULL
  429. ) t1
  430. LEFT JOIN t_exposure t2
  431. ON t1.apptype = t2.apptype
  432. AND t1.mid = t2.mid
  433. AND t1.vid = t2.vid
  434. AND t1.subsessionid = t2.subsessionid
  435. )
  436. WHERE rn = 1
  437. )
  438. ,t_normal_share_exposure_6 AS
  439. (
  440. SELECT *
  441. FROM (
  442. SELECT t1.dthh
  443. ,t1.apptype
  444. ,t1.mid
  445. ,t1.vid
  446. ,t1.sessionid
  447. ,t1.subsessionid
  448. ,t1.pagesource
  449. ,t1.shareid
  450. ,t1.ts
  451. ,t2.id AS exposure_id
  452. ,t2.ts AS exposure_ts
  453. ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
  454. FROM (
  455. SELECT *
  456. FROM t_normal_share_exposure_5
  457. WHERE exposure_id IS NULL
  458. ) t1
  459. LEFT JOIN t_exposure t2
  460. ON t1.apptype = t2.apptype
  461. AND t1.mid = t2.mid
  462. AND t1.vid = t2.vid
  463. AND t1.sessionid = t2.sessionid
  464. )
  465. WHERE rn = 1
  466. )
  467. ,t_exposure_detail AS
  468. (
  469. SELECT *
  470. FROM t_exposure
  471. WHERE pagesource REGEXP "-pages/user-videos-detail$|pages/detail-recommend$"
  472. )
  473. ,t_no_normal_share_exposure_1 AS -- 开始处理非常规的分享与曝光关联
  474. (
  475. SELECT *
  476. FROM (
  477. SELECT t1.dthh
  478. ,t1.apptype
  479. ,t1.mid
  480. ,t1.vid
  481. ,t1.sessionid
  482. ,t1.subsessionid
  483. ,t1.pagesource
  484. ,t1.shareid
  485. ,t1.ts
  486. ,t2.id AS exposure_id
  487. ,t2.ts AS exposure_ts
  488. ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
  489. FROM t_share_from_sharelog t1
  490. LEFT JOIN t_exposure_detail t2
  491. ON t1.apptype = t2.apptype
  492. AND t1.mid = t2.mid
  493. AND t1.vid = t2.vid
  494. AND t1.subsessionid = t2.subsessionid
  495. AND t1.ts >= t2.ts
  496. WHERE t1.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
  497. )
  498. WHERE rn = 1
  499. )
  500. ,t_no_normal_share_exposure_2 AS
  501. (
  502. SELECT *
  503. FROM (
  504. SELECT t1.dthh
  505. ,t1.apptype
  506. ,t1.mid
  507. ,t1.vid
  508. ,t1.sessionid
  509. ,t1.subsessionid
  510. ,t1.pagesource
  511. ,t1.shareid
  512. ,t1.ts
  513. ,t2.id AS exposure_id
  514. ,t2.ts AS exposure_ts
  515. ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
  516. FROM (
  517. SELECT *
  518. FROM t_no_normal_share_exposure_1
  519. WHERE exposure_id IS NULL
  520. ) t1
  521. LEFT JOIN t_exposure_detail t2
  522. ON t1.apptype = t2.apptype
  523. AND t1.mid = t2.mid
  524. AND t1.vid = t2.vid
  525. AND t1.sessionid = t2.sessionid
  526. AND t1.ts >= t2.ts
  527. )
  528. WHERE rn = 1
  529. )
  530. ,t_no_normal_share_exposure_3 AS
  531. (
  532. SELECT *
  533. FROM (
  534. SELECT t1.dthh
  535. ,t1.apptype
  536. ,t1.mid
  537. ,t1.vid
  538. ,t1.sessionid
  539. ,t1.subsessionid
  540. ,t1.pagesource
  541. ,t1.shareid
  542. ,t1.ts
  543. ,t2.id AS exposure_id
  544. ,t2.ts AS exposure_ts
  545. ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
  546. FROM (
  547. SELECT *
  548. FROM t_no_normal_share_exposure_2
  549. WHERE exposure_id IS NULL
  550. ) t1
  551. LEFT JOIN t_exposure_detail t2
  552. ON t1.apptype = t2.apptype
  553. AND t1.mid = t2.mid
  554. AND t1.vid = t2.vid
  555. AND t1.subsessionid = t2.subsessionid
  556. )
  557. WHERE rn = 1
  558. )
  559. ,t_no_normal_share_exposure_4 AS
  560. (
  561. SELECT *
  562. FROM (
  563. SELECT t1.dthh
  564. ,t1.apptype
  565. ,t1.mid
  566. ,t1.vid
  567. ,t1.sessionid
  568. ,t1.subsessionid
  569. ,t1.pagesource
  570. ,t1.shareid
  571. ,t1.ts
  572. ,t2.id AS exposure_id
  573. ,t2.ts AS exposure_ts
  574. ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
  575. FROM (
  576. SELECT *
  577. FROM t_no_normal_share_exposure_3
  578. WHERE exposure_id IS NULL
  579. ) t1
  580. LEFT JOIN t_exposure_detail t2
  581. ON t1.apptype = t2.apptype
  582. AND t1.mid = t2.mid
  583. AND t1.vid = t2.vid
  584. AND t1.sessionid = t2.sessionid
  585. )
  586. WHERE rn = 1
  587. )
  588. ,t_share_exposure AS
  589. (
  590. SELECT *
  591. FROM t_normal_share_exposure_1
  592. WHERE exposure_id IS NOT NULL
  593. UNION ALL
  594. SELECT *
  595. FROM t_normal_share_exposure_2
  596. WHERE exposure_id IS NOT NULL
  597. UNION ALL
  598. SELECT *
  599. FROM t_normal_share_exposure_3
  600. WHERE exposure_id IS NOT NULL
  601. UNION ALL
  602. SELECT *
  603. FROM t_normal_share_exposure_4
  604. WHERE exposure_id IS NOT NULL
  605. UNION ALL
  606. SELECT *
  607. FROM t_normal_share_exposure_5
  608. WHERE exposure_id IS NOT NULL
  609. UNION ALL
  610. SELECT *
  611. FROM t_normal_share_exposure_6
  612. UNION ALL
  613. SELECT *
  614. FROM t_no_normal_share_exposure_1
  615. WHERE exposure_id IS NOT NULL
  616. UNION ALL
  617. SELECT *
  618. FROM t_no_normal_share_exposure_2
  619. WHERE exposure_id IS NOT NULL
  620. UNION ALL
  621. SELECT *
  622. FROM t_no_normal_share_exposure_3
  623. WHERE exposure_id IS NOT NULL
  624. UNION ALL
  625. SELECT *
  626. FROM t_no_normal_share_exposure_4
  627. )
  628. ,t_share_with_label AS
  629. (
  630. SELECT a.dthh
  631. ,a.apptype -- join 条件
  632. ,a.mid
  633. ,a.vid -- join 条件
  634. ,a.sessionid
  635. ,a.subsessionid
  636. ,a.pagesource
  637. ,a.shareid -- join 条件
  638. ,a.ts
  639. ,a.exposure_id
  640. ,COALESCE(b.return_1_pv,0) AS return_1_pv
  641. ,COALESCE(b.return_1_uv,0) AS return_1_uv
  642. ,b.return_1_mids AS return_1_mids -- 可能为null,再决策是否提前处理。
  643. ,COALESCE(c.return_n_pv,0) AS return_n_pv
  644. ,COALESCE(c.return_n_uv,0) AS return_n_uv
  645. ,c.return_n_mids AS return_n_mids -- 可能为null,再决策是否提前处理。
  646. ,COALESCE(c.new_exposure_cnt,0) AS new_exposure_cnt
  647. FROM t_share_exposure a
  648. LEFT JOIN (
  649. SELECT shareid
  650. ,vid
  651. ,apptype
  652. ,COUNT(1) AS return_1_pv
  653. ,COUNT(DISTINCT mid) AS return_1_uv
  654. ,CONCAT_WS(',',COLLECT_SET(mid)) AS return_1_mids
  655. FROM t_return
  656. GROUP BY shareid
  657. ,vid
  658. ,apptype
  659. ) b
  660. ON a.shareid = b.shareid
  661. AND a.vid = b.vid
  662. AND a.apptype = b.apptype
  663. LEFT JOIN (
  664. SELECT rootshareid
  665. ,vid
  666. ,apptype
  667. ,COUNT(1) AS return_n_pv
  668. ,COUNT(DISTINCT mid) AS return_n_uv
  669. ,CONCAT_WS(',',COLLECT_SET(mid)) AS return_n_mids
  670. ,SUM(new_exposure_cnt) AS new_exposure_cnt
  671. FROM t_return_exposure
  672. GROUP BY rootshareid
  673. ,vid
  674. ,apptype
  675. ) c
  676. ON a.shareid = c.rootshareid
  677. AND a.vid = c.vid
  678. AND a.apptype = c.apptype
  679. )
  680. ,t_share_with_label_group AS
  681. (
  682. SELECT exposure_id
  683. ,COUNT(1) AS share_cnt
  684. ,SUM(return_1_pv) AS return_1_pv
  685. ,COALESCE(SIZE(SPLIT(DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_1_mids))),",")),0) AS return_1_uv
  686. ,DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_1_mids))) AS return_1_mids -- 可能是null
  687. ,SUM(return_n_pv) AS return_n_pv
  688. ,COALESCE(SIZE(SPLIT(DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_n_mids))),",")),0) AS return_n_uv
  689. ,DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_n_mids))) AS return_n_mids -- 可能是null
  690. ,SUM(new_exposure_cnt) AS new_exposure_cnt
  691. FROM t_share_with_label
  692. GROUP BY exposure_id
  693. )
  694. ,t_root_source_id_group_name AS
  695. (
  696. SELECT *
  697. FROM (
  698. SELECT root_source_id
  699. ,group_name
  700. ,ROW_NUMBER() OVER (PARTITION BY root_source_id ) AS rn
  701. FROM loghubods.changwen_rootsourceid_group_hour
  702. WHERE dt = MAX_PT('loghubods.changwen_rootsourceid_group_hour')
  703. )
  704. WHERE rn = 1
  705. )
  706. ,t_exposure_share_return AS
  707. (
  708. SELECT apptype
  709. ,uid
  710. ,mid
  711. ,vid
  712. ,sessionid
  713. ,subsessionid
  714. ,pagesource
  715. ,CASE WHEN pagesource REGEXP 'pages/user-videos-share-recommend$' THEN '回流后沉浸页&内页feed'
  716. WHEN pagesource REGEXP 'pages/detail-recommend$' THEN '详情后沉浸页'
  717. WHEN pagesource REGEXP 'pages/user-videos-share$' THEN '回流页'
  718. WHEN pagesource REGEXP 'pages/user-videos-detail$' THEN '详情页'
  719. WHEN pagesource REGEXP 'pages/category$' THEN '首页feed'
  720. ELSE '其他'
  721. END AS pagesource_new
  722. ,recommendlogvo -- 推荐算法的返回结果日志存在这个字段中
  723. ,abcode -- 推荐算法的ab分组
  724. ,recommendpagetype -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
  725. ,recomtraceid
  726. ,headvideoid
  727. ,rootsourceid
  728. ,hotsencetype
  729. ,flowpool -- 14#68#3#1735262438476#2
  730. ,level
  731. ,clientip
  732. ,machineinfo_brand
  733. ,machineinfo_model
  734. ,machineinfo_system
  735. ,machineinfo_wechatversion
  736. ,machineinfo_sdkversion
  737. ,province
  738. ,city
  739. ,ts
  740. ,IF(COALESCE(share_cnt,0) > 0,1,0) AS is_share
  741. ,COALESCE(share_cnt,0) AS share_cnt
  742. ,IF(COALESCE(return_1_uv,0) > 0,1,0) AS is_return_1
  743. ,COALESCE(return_1_pv,0) AS return_1_pv
  744. ,COALESCE(return_1_uv,0) AS return_1_uv
  745. ,return_1_mids -- 可能是null
  746. ,IF(COALESCE(return_n_pv,0) > 0,1,0) AS is_return_n
  747. ,COALESCE(return_n_pv,0) AS return_n_pv
  748. ,COALESCE(return_n_uv,0) AS return_n_uv
  749. ,return_n_mids -- 可能是null
  750. ,IF(COALESCE(COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid)),0),0) > 0,1,0) AS is_return_noself
  751. ,COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid)),0) AS return_1_uv_noself
  752. ,ARRAY_JOIN(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid),",") AS return_1_mids_noself
  753. ,IF(COALESCE(COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid)),0),0) > 0,1,0) AS is_return_n_noself
  754. ,COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid)),0) AS return_n_uv_noself
  755. ,ARRAY_JOIN(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid),",") AS return_n_mids_noself
  756. ,COALESCE(new_exposure_cnt) AS new_exposure_cnt
  757. ,JSON_FORMAT(
  758. JSON_OBJECT("animationSceneType",animationSceneType,"extParams",extParams,"rootsessionid",rootsessionid_new,"versioncode",versioncode,"group_name",tc.group_name)
  759. ) AS extend
  760. ,SUBSTR(dthh,1,8) AS dt
  761. ,SUBSTR(dthh,9,2) AS hh
  762. FROM t_exposure ta
  763. LEFT JOIN t_share_with_label_group tb
  764. ON ta.id = tb.exposure_id
  765. LEFT JOIN t_root_source_id_group_name tc
  766. ON ta.rootsourceid = tc.root_source_id
  767. )SELECT *
  768. FROM t_exposure_share_return
  769. ;