|
@@ -5,6 +5,7 @@ import json
|
|
|
from pathlib import Path
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
from agent import tool
|
|
from agent import tool
|
|
|
|
|
+from examples.demand.mysql import mysql_db
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_odps_data(sql):
|
|
def get_odps_data(sql):
|
|
@@ -79,17 +80,24 @@ def get_changwen_weight(account_name):
|
|
|
bizdatemin = bizdatemin_date.strftime("%Y%m%d")
|
|
bizdatemin = bizdatemin_date.strftime("%Y%m%d")
|
|
|
|
|
|
|
|
sql_query = f'''
|
|
sql_query = f'''
|
|
|
- SELECT 公众号名
|
|
|
|
|
|
|
+SELECT
|
|
|
|
|
+ 公众号名
|
|
|
,videoid
|
|
,videoid
|
|
|
,一级品类
|
|
,一级品类
|
|
|
,二级品类
|
|
,二级品类
|
|
|
,头部曝光
|
|
,头部曝光
|
|
|
|
|
+ ,头部曝光uv
|
|
|
,头部realplay
|
|
,头部realplay
|
|
|
|
|
+ ,头部realplay_uv
|
|
|
,头部分享
|
|
,头部分享
|
|
|
|
|
+ ,头部分享uv
|
|
|
,头部回流人数 AS 头部回流数
|
|
,头部回流人数 AS 头部回流数
|
|
|
,推荐曝光数
|
|
,推荐曝光数
|
|
|
|
|
+ ,当日分发曝光uv
|
|
|
,推荐realplay
|
|
,推荐realplay
|
|
|
|
|
+ ,分发realplay_uv
|
|
|
,推荐分享数
|
|
,推荐分享数
|
|
|
|
|
+ ,当日分发分享uv
|
|
|
,推荐回流数
|
|
,推荐回流数
|
|
|
,当日回流进入分发曝光次数 AS vov分子
|
|
,当日回流进入分发曝光次数 AS vov分子
|
|
|
FROM (
|
|
FROM (
|
|
@@ -116,6 +124,12 @@ FROM (
|
|
|
,NVL(b.当日回流进入分发曝光次数,0) AS 当日回流进入分发曝光次数
|
|
,NVL(b.当日回流进入分发曝光次数,0) AS 当日回流进入分发曝光次数
|
|
|
,NVL(b.当日回流进入分发曝光次数,0) / a.当日分发曝光pv AS vov分子
|
|
,NVL(b.当日回流进入分发曝光次数,0) / a.当日分发曝光pv AS vov分子
|
|
|
,d.头部回流人数
|
|
,d.头部回流人数
|
|
|
|
|
+ ,当日分发曝光uv
|
|
|
|
|
+ ,头部曝光uv
|
|
|
|
|
+ ,当日分发分享uv
|
|
|
|
|
+ ,头部分享uv
|
|
|
|
|
+ ,分发realplay_uv
|
|
|
|
|
+ ,头部realplay_uv
|
|
|
FROM (
|
|
FROM (
|
|
|
SELECT account_name AS 公众号名
|
|
SELECT account_name AS 公众号名
|
|
|
,videoid
|
|
,videoid
|
|
@@ -124,9 +138,15 @@ FROM (
|
|
|
,COUNT(
|
|
,COUNT(
|
|
|
CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoView' THEN mid END
|
|
CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoView' THEN mid END
|
|
|
) AS 当日分发曝光pv
|
|
) AS 当日分发曝光pv
|
|
|
|
|
+ ,COUNT(DISTINCT
|
|
|
|
|
+ CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoView' THEN mid END
|
|
|
|
|
+ ) AS 当日分发曝光uv
|
|
|
,COUNT(
|
|
,COUNT(
|
|
|
CASE WHEN pagesource REGEXP 'pages/user-videos-share$' AND businesstype = 'videoView' THEN mid END
|
|
CASE WHEN pagesource REGEXP 'pages/user-videos-share$' AND businesstype = 'videoView' THEN mid END
|
|
|
) AS 头部曝光pv
|
|
) AS 头部曝光pv
|
|
|
|
|
+ ,COUNT(DISTINCT
|
|
|
|
|
+ CASE WHEN pagesource REGEXP 'pages/user-videos-share$' AND businesstype = 'videoView' THEN mid END
|
|
|
|
|
+ ) AS 头部曝光uv
|
|
|
,COUNT(
|
|
,COUNT(
|
|
|
CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoPlay' THEN mid END
|
|
CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoPlay' THEN mid END
|
|
|
) AS 当日分发播放pv
|
|
) AS 当日分发播放pv
|
|
@@ -136,9 +156,15 @@ FROM (
|
|
|
,COUNT(
|
|
,COUNT(
|
|
|
CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoShareFriend' THEN mid END
|
|
CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoShareFriend' THEN mid END
|
|
|
) AS 当日分发分享pv
|
|
) AS 当日分发分享pv
|
|
|
|
|
+ ,COUNT(DISTINCT
|
|
|
|
|
+ CASE WHEN pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND businesstype = 'videoShareFriend' THEN mid END
|
|
|
|
|
+ ) AS 当日分发分享uv
|
|
|
,COUNT(
|
|
,COUNT(
|
|
|
CASE WHEN pagesource REGEXP 'pages/user-videos-share$' AND businesstype = 'videoShareFriend' THEN mid END
|
|
CASE WHEN pagesource REGEXP 'pages/user-videos-share$' AND businesstype = 'videoShareFriend' THEN mid END
|
|
|
) AS 头部分享pv
|
|
) AS 头部分享pv
|
|
|
|
|
+ ,COUNT(DISTINCT
|
|
|
|
|
+ CASE WHEN pagesource REGEXP 'pages/user-videos-share$' AND businesstype = 'videoShareFriend' THEN mid END
|
|
|
|
|
+ ) AS 头部分享uv
|
|
|
FROM (
|
|
FROM (
|
|
|
SELECT DISTINCT a.mid
|
|
SELECT DISTINCT a.mid
|
|
|
,a.videoid
|
|
,a.videoid
|
|
@@ -238,12 +264,16 @@ FROM (
|
|
|
CASE WHEN a.pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' THEN a.mid END
|
|
CASE WHEN a.pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' THEN a.mid END
|
|
|
) AS 分发realplay_pv
|
|
) AS 分发realplay_pv
|
|
|
,COUNT(CASE WHEN a.pagesource REGEXP 'pages/user-videos-share$' THEN a.mid END) AS 头部realplay_pv
|
|
,COUNT(CASE WHEN a.pagesource REGEXP 'pages/user-videos-share$' THEN a.mid END) AS 头部realplay_pv
|
|
|
|
|
+ ,COUNT(DISTINCT
|
|
|
|
|
+ CASE WHEN a.pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' THEN a.mid END
|
|
|
|
|
+ ) AS 分发realplay_uv
|
|
|
|
|
+ ,COUNT(DISTINCT CASE WHEN a.pagesource REGEXP 'pages/user-videos-share$' THEN a.mid END) AS 头部realplay_uv
|
|
|
FROM loghubods.ods_video_play_log_day a
|
|
FROM loghubods.ods_video_play_log_day a
|
|
|
LEFT JOIN (
|
|
LEFT JOIN (
|
|
|
SELECT DISTINCT open_id
|
|
SELECT DISTINCT open_id
|
|
|
,union_id
|
|
,union_id
|
|
|
FROM loghubods.user_wechat_identity_info_ha
|
|
FROM loghubods.user_wechat_identity_info_ha
|
|
|
- WHERE dt = MAX_PT("loghubods.user_wechat_identity_info_ha")
|
|
|
|
|
|
|
+ WHERE dt = MAX_PT("loghubods.user_wechat_identity_info_ha")
|
|
|
) b
|
|
) b
|
|
|
ON a.mid = CONCAT('weixin_openid_',b.open_id)
|
|
ON a.mid = CONCAT('weixin_openid_',b.open_id)
|
|
|
LEFT JOIN loghubods.gzh_fans_info d
|
|
LEFT JOIN loghubods.gzh_fans_info d
|
|
@@ -251,7 +281,7 @@ FROM (
|
|
|
AND d.dt = MAX_PT("loghubods.gzh_fans_info")
|
|
AND d.dt = MAX_PT("loghubods.gzh_fans_info")
|
|
|
WHERE a.dt >= '{bizdatemin}'
|
|
WHERE a.dt >= '{bizdatemin}'
|
|
|
AND a.dt <= '{bizdatemax}'
|
|
AND a.dt <= '{bizdatemax}'
|
|
|
- AND a.businesstype = 'videoRealPlay'
|
|
|
|
|
|
|
+ AND a.businesstype = 'videoRealPlay'
|
|
|
AND d.user_create_time IS NOT NULL
|
|
AND d.user_create_time IS NOT NULL
|
|
|
AND d.account_name = '{account_name}'
|
|
AND d.account_name = '{account_name}'
|
|
|
GROUP BY d.account_name
|
|
GROUP BY d.account_name
|
|
@@ -315,14 +345,20 @@ ORDER BY 推荐曝光数 DESC
|
|
|
"二级品类": r[3],
|
|
"二级品类": r[3],
|
|
|
"ext_data": {
|
|
"ext_data": {
|
|
|
"头部曝光": r[4],
|
|
"头部曝光": r[4],
|
|
|
- "头部realplay": r[5],
|
|
|
|
|
- "头部分享": r[6],
|
|
|
|
|
- "头部回流数": r[7],
|
|
|
|
|
- "推荐曝光数": r[8],
|
|
|
|
|
- "推荐realplay": r[9],
|
|
|
|
|
- "推荐分享数": r[10],
|
|
|
|
|
- "推荐回流数": r[11],
|
|
|
|
|
- "vov分子": r[12],
|
|
|
|
|
|
|
+ "头部曝光uv": r[5],
|
|
|
|
|
+ "头部realplay": r[6],
|
|
|
|
|
+ "头部realplay_uv": r[7],
|
|
|
|
|
+ "头部分享": r[8],
|
|
|
|
|
+ "头部分享uv": r[9],
|
|
|
|
|
+ "头部回流数": r[10],
|
|
|
|
|
+ "推荐曝光数": r[11],
|
|
|
|
|
+ "当日分发曝光uv": r[12],
|
|
|
|
|
+ "推荐realplay": r[13],
|
|
|
|
|
+ "分发realplay_uv": r[14],
|
|
|
|
|
+ "推荐分享数": r[15],
|
|
|
|
|
+ "当日分发分享uv": r[16],
|
|
|
|
|
+ "推荐回流数": r[17],
|
|
|
|
|
+ "vov分子": r[18],
|
|
|
},
|
|
},
|
|
|
}
|
|
}
|
|
|
)
|
|
)
|
|
@@ -337,6 +373,82 @@ ORDER BY 推荐曝光数 DESC
|
|
|
return result_list
|
|
return result_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def get_merge_leve2_by_video_ids(video_ids, batch_size=2000):
|
|
|
|
|
+ result = {}
|
|
|
|
|
+ if not video_ids:
|
|
|
|
|
+ return result
|
|
|
|
|
+
|
|
|
|
|
+ normalized_ids = [str(video_id) for video_id in video_ids if video_id is not None]
|
|
|
|
|
+ for i in range(0, len(normalized_ids), batch_size):
|
|
|
|
|
+ batch_ids = normalized_ids[i:i + batch_size]
|
|
|
|
|
+ escaped_ids = [video_id.replace("'", "''") for video_id in batch_ids]
|
|
|
|
|
+ video_ids_in_clause = ", ".join([f"'{video_id}'" for video_id in escaped_ids])
|
|
|
|
|
+ sql_query = f'''
|
|
|
|
|
+ SELECT videoid, merge_leve2
|
|
|
|
|
+ FROM loghubods.video_merge_tag
|
|
|
|
|
+ WHERE videoid IN ({video_ids_in_clause})
|
|
|
|
|
+ '''
|
|
|
|
|
+ data = get_odps_data(sql_query)
|
|
|
|
|
+ if not data:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ for row in data:
|
|
|
|
|
+ result[str(row[0])] = row[1]
|
|
|
|
|
+
|
|
|
|
|
+ return result
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def get_all_decode_task_result_rows():
|
|
|
|
|
+ return mysql_db.select(
|
|
|
|
|
+ "workflow_decode_task_result",
|
|
|
|
|
+ columns="id, channel_content_id, merge_leve2",
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def update_decode_task_result_merge_leve2(channel_content_id, merge_leve2):
|
|
|
|
|
+ return mysql_db.update(
|
|
|
|
|
+ "workflow_decode_task_result",
|
|
|
|
|
+ {"merge_leve2": str(merge_leve2)},
|
|
|
|
|
+ "channel_content_id = %s",
|
|
|
|
|
+ (str(channel_content_id),),
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def backfill_merge_leve2_for_decode_task_result():
|
|
|
|
|
+ rows = get_all_decode_task_result_rows()
|
|
|
|
|
+ updated_count = 0
|
|
|
|
|
+ skipped_count = 0
|
|
|
|
|
+ valid_content_ids = []
|
|
|
|
|
+
|
|
|
|
|
+ for row in rows:
|
|
|
|
|
+ channel_content_id = row.get("channel_content_id")
|
|
|
|
|
+ if channel_content_id is None:
|
|
|
|
|
+ skipped_count += 1
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ channel_content_id = str(channel_content_id)
|
|
|
|
|
+ if len(channel_content_id) > 8:
|
|
|
|
|
+ skipped_count += 1
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ valid_content_ids.append(channel_content_id)
|
|
|
|
|
+
|
|
|
|
|
+ merge_leve2_map = get_merge_leve2_by_video_ids(valid_content_ids, batch_size=2000)
|
|
|
|
|
+
|
|
|
|
|
+ for channel_content_id in valid_content_ids:
|
|
|
|
|
+ merge_leve2 = merge_leve2_map.get(channel_content_id)
|
|
|
|
|
+ if not merge_leve2:
|
|
|
|
|
+ continue
|
|
|
|
|
+ affected = update_decode_task_result_merge_leve2(channel_content_id, merge_leve2)
|
|
|
|
|
+ if affected > 0:
|
|
|
|
|
+ updated_count += affected
|
|
|
|
|
+
|
|
|
|
|
+ return {
|
|
|
|
|
+ "total": len(rows),
|
|
|
|
|
+ "updated": updated_count,
|
|
|
|
|
+ "skipped": skipped_count,
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
|
- result_list = get_changwen_weight('青史铁事漫谈')
|
|
|
|
|
- print(result_list)
|
|
|
|
|
|
|
+ backfill_merge_leve2_for_decode_task_result()
|