Преглед на файлове

add speical-mid & videos data

liqian преди 2 години
родител
ревизия
d3255a5dd9
променени са 5 файла, в които са добавени 154 реда и са изтрити 0 реда
  1. 15 0
      config.py
  2. 76 0
      special_mid_videos_update.py
  3. 7 0
      special_mid_videos_update_task.sh
  4. 49 0
      special_mids_update.py
  5. 7 0
      special_mids_update_task.sh

+ 15 - 0
config.py

@@ -278,6 +278,21 @@ class BaseConfig(object):
     # 最惊奇电影类视频相关推荐列表存放 redis key前缀,完整格式: com.weiqu.movie.relevant.list.item.{videoId}
     MOVIE_RELEVANT_LIST_KEY_NAME_PREFIX = 'com.weiqu.movie.relevant.list.item.'
 
+    # 特殊mid 及 指定视频 数据
+    SPECIAL_MID_VIDEOS_PROJECT = {
+        'mid': 'loghubods',
+        'videos': 'loghubods'
+    }
+    SPECIAL_MID_VIDEOS_TABLE = {
+        'mid': 'shielded_mid',
+        'videos': 'shielded_video_list'
+    }
+
+    # 特殊mid更新结果存放 redis key ,完整格式:'com.weiqu.video.special.mid'
+    KEY_NAME_SPECIAL_MID = 'com.weiqu.video.special.mid'
+    # 特殊mid对应指定视频列表更新结果存放 redis key 前缀,完整格式:'com.weiqu.video.special.videos.item.{date}'
+    KEY_NAME_PREFIX_SPECIAL_VIDEOS = 'com.weiqu.video.special.videos.item.'
+
 
 class DevelopmentConfig(BaseConfig):
     """开发环境配置"""

+ 76 - 0
special_mid_videos_update.py

@@ -0,0 +1,76 @@
+import time
+import datetime
+import pandas as pd
+import math
+import random
+from odps import ODPS
+from threading import Timer
+from get_data import get_data_from_odps
+from db_helper import RedisHelper, MysqlHelper
+from config import set_config
+from log import Log
+from utils import filter_video_status_with_applet_rec
+
+config_, env = set_config()
+log_ = Log()
+
+
+def data_check(project, table, now_date):
+    """检查数据是否准备好"""
+    odps = ODPS(
+        access_id=config_.ODPS_CONFIG['ACCESSID'],
+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
+        project=project,
+        endpoint=config_.ODPS_CONFIG['ENDPOINT'],
+        connect_timeout=3000,
+        read_timeout=500000,
+        pool_maxsize=1000,
+        pool_connections=1000
+    )
+
+    try:
+        dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+        sql = f'select * from {project}.{table} where dt = {dt}'
+        with odps.execute_sql(sql=sql).open_reader() as reader:
+            data_count = reader.count
+    except Exception as e:
+        data_count = 0
+    return data_count
+
+
+def get_special_videos(now_date, project, table):
+    """获取特殊mid指定的视频列表"""
+    # 获取videoId
+    dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+    records = get_data_from_odps(date=dt, project=project, table=table)
+    video_id_list = [record['videoid'] for record in records]
+    # 排序合并,随机给定分数
+    final_result = {}
+    for video_id in video_id_list:
+        final_result[int(video_id)] = random.uniform(0, 100)
+    # 写入对应的redis
+    key_name = \
+        f"{config_.KEY_NAME_PREFIX_SPECIAL_VIDEOS}{datetime.datetime.strftime(now_date, '%Y%m%d')}"
+    if len(final_result) > 0:
+        redis_helper = RedisHelper()
+        redis_helper.add_data_with_zset(key_name=key_name, data=final_result, expire_time=2 * 24 * 3600)
+
+
+def h_timer_check():
+    project = config_.SPECIAL_MID_VIDEOS_PROJECT.get('videos')
+    table = config_.SPECIAL_MID_VIDEOS_TABLE.get('videos')
+    now_date = datetime.datetime.today()
+    log_.info(f"now_date: {datetime.datetime.strftime(now_date, '%Y%m%d')}")
+    # 查看当天更新的数据是否已准备好
+    data_count = data_check(project=project, table=table, now_date=now_date)
+    if data_count > 0:
+        log_.info(f'special_videos_count = {data_count}')
+        # 数据准备好,进行更新
+        get_special_videos(now_date=now_date, project=project, table=table)
+    else:
+        # 数据没准备好,1分钟后重新检查
+        Timer(5 * 60, h_timer_check).start()
+
+
+if __name__ == '__main__':
+    h_timer_check()

+ 7 - 0
special_mid_videos_update_task.sh

@@ -0,0 +1,7 @@
+source /etc/profile
+echo $ROV_OFFLINE_ENV
+if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
+    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/special_mid_videos_update.py
+elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
+    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/special_mid_videos_update.py
+fi

+ 49 - 0
special_mids_update.py

@@ -0,0 +1,49 @@
+import time
+import datetime
+import pandas as pd
+import math
+import random
+from odps import ODPS
+from threading import Timer
+from get_data import get_data_from_odps
+from db_helper import RedisHelper, MysqlHelper
+from config import set_config
+from log import Log
+from utils import filter_video_status_with_applet_rec
+
+config_, env = set_config()
+log_ = Log()
+
+
+def get_special_mid(now_date, project, table):
+    """获取特殊mid数据"""
+    dt = datetime.datetime.strftime(now_date, '%Y%m%d%H')
+    odps = ODPS(
+        access_id=config_.ODPS_CONFIG['ACCESSID'],
+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
+        project=project,
+        endpoint=config_.ODPS_CONFIG['ENDPOINT'],
+        onnect_timeout=3000,
+        read_timeout=500000,
+        pool_maxsize=1000,
+        pool_connections=100
+    )
+    records = odps.read_table(name=table)
+    mid_list = []
+    for record in records:
+        if record['sharemid']:
+            mid_list.append(record['sharemid'])
+        if record['clickmid']:
+            mid_list.append(record['clickmid'])
+    mid_list2 = list(set(mid_list))
+    redis_helper = RedisHelper()
+    redis_helper.add_data_with_set(key_name=config_.KEY_NAME_SPECIAL_MID, values=mid_list2, expire_time=2 * 3600)
+    log_.info(f"dt = {dt}, special_mid_count = {len(mid_list2)}")
+
+
+if __name__ == '__main__':
+    project = config_.SPECIAL_MID_VIDEOS_PROJECT.get('mid')
+    table = config_.SPECIAL_MID_VIDEOS_TABLE.get('mid')
+    now_date = datetime.datetime.today()
+    log_.info(f"now_date: {datetime.datetime.strftime(now_date, '%Y%m%d%H')}")
+    get_special_mid(now_date=now_date, project=project, table=table)

+ 7 - 0
special_mids_update_task.sh

@@ -0,0 +1,7 @@
+source /etc/profile
+echo $ROV_OFFLINE_ENV
+if [[ $ROV_OFFLINE_ENV == 'test' ]]; then
+    cd /data2/rov-offline && /root/anaconda3/bin/python /data2/rov-offline/special_mids_update.py
+elif [[ $ROV_OFFLINE_ENV == 'pro' ]]; then
+    cd /data/rov-offline && /root/anaconda3/bin/python /data/rov-offline/special_mids_update.py
+fi