|
@@ -0,0 +1,133 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+import traceback
|
|
|
+from config import set_config
|
|
|
+from log import Log
|
|
|
+from utils import execute_sql_from_odps
|
|
|
+from db_helper import RedisHelper
|
|
|
+import datetime
|
|
|
+from alg_recsys_recall_4h_region_trend import records_process_for_list
|
|
|
+config_, _ = set_config()
|
|
|
+log_ = Log()
|
|
|
+redis_helper = RedisHelper()
|
|
|
+
|
|
|
+PROJECT = "videoods"
|
|
|
+TABLE = "videoods.dim_video"
|
|
|
+REDIS_PREFIX = "alg_recsys_video_tags_"
|
|
|
+
|
|
|
+TAG_SET = set(
|
|
|
+ '元旦','腊八节','小年','小年','除夕','春节','情人节','元宵节','龙抬头','妇女节','劳动节','母亲节',
|
|
|
+ '儿童节','端午节','父亲节','建党节','七七事变','建军节','七夕节','中元节','中秋节','毛主席逝世',
|
|
|
+ '国庆节','重阳节','感恩节','公祭日','平安夜','圣诞节','毛主席诞辰','小寒','大寒','立春','雨水',
|
|
|
+ '惊蛰','春分','清明','谷雨','立夏','小满','芒种','夏至','小暑','大暑','立秋','处暑','白露','秋分',
|
|
|
+ '寒露','霜降','立冬','小雪','大雪','冬至','早上好','中午好','下午好','晚上好','晚安','祝福',
|
|
|
+ 'P1高风险','P0高风险'
|
|
|
+)
|
|
|
+
|
|
|
+def get_video_tags():
|
|
|
+ """获取视频的tag"""
|
|
|
+ try:
|
|
|
+ sql = "SELECT videoid \
|
|
|
+,tags \
|
|
|
+FROM {} \
|
|
|
+LATERAL VIEW EXPLODE(SPLIT(tags,',')) exploded AS exploded_value \
|
|
|
+WHERE tags IS NOT NULL \
|
|
|
+AND ( \
|
|
|
+exploded_value = '元旦' \
|
|
|
+OR exploded_value = '腊八节' \
|
|
|
+OR exploded_value = '小年' \
|
|
|
+OR exploded_value = '除夕' \
|
|
|
+OR exploded_value = '春节' \
|
|
|
+OR exploded_value = '情人节' \
|
|
|
+OR exploded_value = '元宵节' \
|
|
|
+OR exploded_value = '龙抬头' \
|
|
|
+OR exploded_value = '妇女节' \
|
|
|
+OR exploded_value = '劳动节' \
|
|
|
+OR exploded_value = '母亲节' \
|
|
|
+OR exploded_value = '儿童节' \
|
|
|
+OR exploded_value = '端午节' \
|
|
|
+OR exploded_value = '父亲节' \
|
|
|
+OR exploded_value = '建党节' \
|
|
|
+OR exploded_value = '七七事变' \
|
|
|
+OR exploded_value = '建军节' \
|
|
|
+OR exploded_value = '七夕节' \
|
|
|
+OR exploded_value = '中元节' \
|
|
|
+OR exploded_value = '中秋节' \
|
|
|
+OR exploded_value = '毛主席逝世' \
|
|
|
+OR exploded_value = '国庆节' \
|
|
|
+OR exploded_value = '重阳节' \
|
|
|
+OR exploded_value = '感恩节' \
|
|
|
+OR exploded_value = '公祭日' \
|
|
|
+OR exploded_value = '平安夜' \
|
|
|
+OR exploded_value = '圣诞节' \
|
|
|
+OR exploded_value = '毛主席诞辰' \
|
|
|
+OR exploded_value = '小寒' \
|
|
|
+OR exploded_value = '大寒' \
|
|
|
+OR exploded_value = '立春' \
|
|
|
+OR exploded_value = '雨水' \
|
|
|
+OR exploded_value = '惊蛰' \
|
|
|
+OR exploded_value = '春分' \
|
|
|
+OR exploded_value = '清明' \
|
|
|
+OR exploded_value = '谷雨' \
|
|
|
+OR exploded_value = '立夏' \
|
|
|
+OR exploded_value = '小满' \
|
|
|
+OR exploded_value = '芒种' \
|
|
|
+OR exploded_value = '夏至' \
|
|
|
+OR exploded_value = '小暑' \
|
|
|
+OR exploded_value = '大暑' \
|
|
|
+OR exploded_value = '立秋' \
|
|
|
+OR exploded_value = '处暑' \
|
|
|
+OR exploded_value = '白露' \
|
|
|
+OR exploded_value = '秋分' \
|
|
|
+OR exploded_value = '寒露' \
|
|
|
+OR exploded_value = '霜降' \
|
|
|
+OR exploded_value = '立冬' \
|
|
|
+OR exploded_value = '小雪' \
|
|
|
+OR exploded_value = '大雪' \
|
|
|
+OR exploded_value = '冬至' \
|
|
|
+OR exploded_value = '早上好' \
|
|
|
+OR exploded_value = '中午好' \
|
|
|
+OR exploded_value = '下午好' \
|
|
|
+OR exploded_value = '晚上好' \
|
|
|
+OR exploded_value = '晚安' \
|
|
|
+OR exploded_value = '祝福' \
|
|
|
+OR exploded_value = 'P1高风险' \
|
|
|
+OR exploded_value = 'P0高风险' \
|
|
|
+)".format(TABLE)
|
|
|
+ print("sql:"+sql)
|
|
|
+ records = execute_sql_from_odps(project=PROJECT, sql=sql)
|
|
|
+ video_tags_list = []
|
|
|
+ with records.open_reader() as reader:
|
|
|
+ for record in reader:
|
|
|
+ video_id = int(record['videoid'])
|
|
|
+ tags = ",".join([i for i in str(record['tags']).split(",") if i in TAG_SET])
|
|
|
+ d = {}
|
|
|
+ d["video_id"] = video_id
|
|
|
+ d["tags"] = tags
|
|
|
+ video_tags_list.append(d)
|
|
|
+ log_.info("{}:{}".format(video_id, tags))
|
|
|
+
|
|
|
+ records_process_for_list(video_tags_list, process_and_store, max_size=10, num_workers=5)
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ log_.error(str(e) + str(traceback.format_exc()))
|
|
|
+
|
|
|
+def process_and_store(row):
|
|
|
+ video_id = row["video_id"]
|
|
|
+ tags = row["tags"]
|
|
|
+ key = REDIS_PREFIX + str(video_id)
|
|
|
+ expire_time = 24 * 3600 * 2
|
|
|
+ redis_helper.set_data_to_redis(key, tags, expire_time)
|
|
|
+ log_.info("trend-avg写入数据key={},value={}".format(key, tags))
|
|
|
+
|
|
|
+def main():
|
|
|
+ log_.info("开始执行:" + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
|
|
+ get_video_tags()
|
|
|
+ log_.info("完成执行:" + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ main()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+# cd /root/zhangbo/rov-offline
|
|
|
+# python alg_recsys_recall_shield_videos.py
|