Browse Source

update i2i

linfan 2 years ago
parent
commit
533de2e965
6 changed files with 798 additions and 0 deletions
  1. 113 0
      calI2I.py
  2. 280 0
      config.py
  3. 307 0
      db_help.py
  4. 54 0
      extract_share_log.py
  5. 25 0
      import_redist.py
  6. 19 0
      run.sh

+ 113 - 0
calI2I.py

@@ -0,0 +1,113 @@
+#coding utf-8
+import sys
+from operator import itemgetter
+import json
+
+if __name__=="__main__":
+    #1.load data
+    nowdate=sys.argv[1]
+    f = open("./data/user_item_share_"+nowdate)
+    user_item_dict={}
+    item_dict = {}  
+    while True:
+        line = f.readline()
+        if not line:
+            break
+        items = line.strip().split("\t")
+        if len(items)<3:
+            continue
+        key = (items[1],items[2])
+        #print(key)
+        if key not in user_item_dict:
+            user_item_dict[key] = 1
+        else:
+            user_item_dict[key] = user_item_dict[key]+1
+        if items[2] not in item_dict:
+            item_dict[items[2]] = 1
+        else:
+            item_dict[items[2]] = item_dict[items[2]]+1
+    f.close()
+    #((user,item), score)
+    #print(user_item_dict)
+    #2. (uid, [(vid, score)....])
+    user_group_dict = {}
+    for k, v in user_item_dict.items():
+        uid = k[0]
+        vid = k[1]
+        score = v
+        vid_list = []
+        if uid not in user_group_dict:
+            vid_list.append((vid, score))
+            user_group_dict[uid] = vid_list
+        else:
+            vid_list = user_group_dict[uid]
+            vid_list.append((vid, score))
+            user_group_dict[uid] = vid_list
+    #print(user_group_dict)
+    item_pair_dict = {}
+    #3. expand item
+    for k, v_list in user_group_dict.items():
+         v_n = len(v_list)
+         if v_n<2:
+             continue
+         for i in range(v_n):
+             for j in range(1, v_n):
+                if v_list[i][0] == v_list[j][0]:
+                    continue
+                item_key = (v_list[i][0], v_list[j][0])
+                item_score = min(v_list[i][1], v_list[j][1])
+                if item_key not in item_pair_dict:
+                    item_pair_dict[item_key] = item_score
+                else:
+                    item_pair_dict[item_key] = item_pair_dict[item_key]+item_score
+    #print(item_pair_dict)
+    print(len(item_pair_dict))
+    print(len(item_dict))
+    left_pair_num = 0
+    rec_item_dict = {}
+    #4. rec item
+    for k, v in item_pair_dict.items():
+        if v<2:
+            continue
+        left_pair_num+=1
+        item1 = k[0]
+        item2 = k[1]
+        pair_score = v
+        if item1 in item_dict:
+             item_score1 = item_dict[item1]
+             i2i_pro = pair_score/(item_score1+5)
+             rec_list1 = []
+             if item2 not in rec_item_dict:
+                 rec_list1.append((item1, i2i_pro, pair_score, item_score1))
+                 rec_item_dict[item2] = rec_list1
+             else:
+                 rec_list1 = rec_item_dict[item2]
+                 rec_list1.append((item1, i2i_pro, pair_score, item_score1))
+                 rec_item_dict[item2] = rec_list1
+        if item2 in item_dict:
+             item_score2 = item_dict[item2]
+             i2i_pro = pair_score/(item_score2+5)
+             rec_list2 = []
+             if item1 not in rec_item_dict:
+                 rec_list2.append((item2, i2i_pro, pair_score, item_score2))
+                 rec_item_dict[item1] = rec_list2
+             else:
+                 rec_list2 = rec_item_dict[item1]
+                 rec_list2.append((item2, i2i_pro, pair_score, item_score2))
+                 rec_item_dict[item1] = rec_list2          
+     
+    #(item, share_count)
+    print(left_pair_num)
+    #print(rec_item_dict)
+    final_rec_list = []
+    #f = open("rec_result", "w")
+    #5. sorted item_list
+    for k,v in rec_item_dict.items():
+        sorted_v = sorted(v, key=itemgetter(1), reverse=True)
+        final_rec_list.append((k, sorted_v))
+    #print(final_rec_list[:1])
+    #json_str = json.dumps(final_rec_list)
+    with open("./data/rec_result_"+nowdate+".json", "w") as f :
+        json.dump(final_rec_list, f)
+    
+     

+ 280 - 0
config.py

@@ -0,0 +1,280 @@
+import os
+# from log import Log
+# log_ = Log()
+
+class BaseConfig(object):
+    # 产品标识
+    APP_TYPE = {
+        'VLOG': 0,  # vlog
+        'LOVE_LIVE': 4,  # 票圈视频
+        'LONG_VIDEO': 5,  # 内容精选
+        'SHORT_VIDEO': 6,  # 票圈短视频
+        'WAN_NENG_VIDEO': 17,  # 万能影视屋
+        'LAO_HAO_KAN_VIDEO': 18,  # 老好看视频
+        'ZUI_JING_QI': 19,  # 票圈最惊奇
+        'APP': 13,  # 票圈视频APP
+        'PIAO_QUAN_VIDEO_PLUS': 21,  # 票圈视频+
+        'JOURNEY': 22,  # 票圈足迹
+        'BLESSING_YEAR': 3,  # 票圈福年
+    }
+    # ODPS服务配置
+    ODPS_CONFIG = {
+        'ENDPOINT': 'http://service.cn.maxcompute.aliyun.com/api',
+        'ACCESSID': 'LTAIWYUujJAm7CbH',
+        'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
+    }
+    
+    # 日志服务配置
+    ALIYUN_LOG = {
+        'ENDPOINT': 'cn-hangzhou-intranet.log.aliyuncs.com',
+        'ACCESSID': 'LTAIWYUujJAm7CbH',
+        'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
+        'PROJECT': 'rov-server',
+    }
+  
+    REDIS_INFO = {
+        'host': 'r-bp1fogs2mflr1ybfot.redis.rds.aliyuncs.com',
+        'port': 6379,
+        'password': 'Wqsd@2019',
+    }
+ 
+    # 小年糕视频redis存储key
+    XNG_KEY_NAME = 'xng:videos'
+    # 特殊地区屏蔽危险视频redis存储key
+    SPECIAL_AREA_LIMIT_KEY_NAME = 'special:area:limit:videos'
+    #24 hour update 
+    PROJECT_REGION_APP_TYPE = 'loghubods'
+    TABLE_REGION_APP_TYPE = 'video_each_hour_update_province_apptype'
+    
+    CITY_CODE = {
+        '广州': '440100', '深圳': '440300', '成都': '510100', '长沙': '430100',
+    }
+    DATA_PARAMS = {
+        'data1': {APP_TYPE['VLOG']: 0},  # vlog
+        'data2': {APP_TYPE['VLOG']: 0.5, APP_TYPE['LONG_VIDEO']: 0.5},  # [vlog, 内容精选]
+        'data3': {APP_TYPE['VLOG']: 0.5, APP_TYPE['LOVE_LIVE']: 0.5},  # [vlog, 票圈视频]
+        'data4': {APP_TYPE['VLOG']: 0.5, APP_TYPE['SHORT_VIDEO']: 0.5},  # [vlog, 票圈短视频]
+        # 'data5': [APP_TYPE['VLOG'], APP_TYPE['ZUI_JING_QI']],  # [vlog, 最惊奇]
+        'data6': {APP_TYPE['VLOG']: 0.25, APP_TYPE['LOVE_LIVE']: 0.25, APP_TYPE['SHORT_VIDEO']: 0.25,
+                  APP_TYPE['LONG_VIDEO']: 0.25},
+        'data7': {APP_TYPE['VLOG']: 0.5, APP_TYPE['APP']: 0.5},  # [vlog, 票圈视频APP]
+        'data8': {APP_TYPE['VLOG']: 0.7, APP_TYPE['LONG_VIDEO']: 0.3},  # [vlog, 内容精选]
+        'data9': {APP_TYPE['VLOG']: 0.3, APP_TYPE['LONG_VIDEO']: 0.7},  # [vlog, 内容精选]
+        'data10': {APP_TYPE['VLOG']: 0.2, APP_TYPE['LOVE_LIVE']: 0.8},  # [vlog, 票圈视频]
+        'data11': {APP_TYPE['VLOG']: 0.3, APP_TYPE['LOVE_LIVE']: 0.7},  # [vlog, 票圈视频]
+        'data12': {APP_TYPE['VLOG']: 0.4, APP_TYPE['SHORT_VIDEO']: 0.6},  # [vlog, 票圈短视频]
+        'data13': {APP_TYPE['VLOG']: 0.3, APP_TYPE['SHORT_VIDEO']: 0.7},  # [vlog, 票圈短视频]
+        'data14': {APP_TYPE['VLOG']: 0.78, APP_TYPE['LOVE_LIVE']: 0.11, APP_TYPE['SHORT_VIDEO']: 0.08,
+                   APP_TYPE['LONG_VIDEO']: 0.03},
+
+    }
+
+    REGION_CODE = {
+        '北京': '110000', '天津': '120000', '河北省': '130000', '山西省': '140000', '内蒙古': '150000',
+        '辽宁省': '210000', '吉林省': '220000', '黑龙江省': '230000',
+        '上海': '310000', '江苏省': '320000', '浙江省': '330000', '安徽省': '340000', '福建省': '350000', '江西省': '360000', '山东省': '370000',
+        '河南省': '410000', '湖北省': '420000', '湖南省': '430000', '广东省': '440000', '广西': '450000', '海南省': '460000',
+        '重庆': '500000',  '四川省': '510000', '贵州省': '520000', '云南省': '530000', '西藏': '540000',
+        '陕西省': '610000', '甘肃省': '620000', '青海省': '630000', '宁夏': '640000', '新疆': '650000',
+        '台湾省': '710000', '香港': '810000', '澳门': '820000',
+        'None': '-1'
+    } 
+     # 屏蔽视频配置实验组
+    SHIELD_CONFIG2 = {
+        REGION_CODE['北京']: [XNG_KEY_NAME, SPECIAL_AREA_LIMIT_KEY_NAME, ],
+        REGION_CODE['None']: [SPECIAL_AREA_LIMIT_KEY_NAME, XNG_KEY_NAME, ],
+        CITY_CODE['广州']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
+        CITY_CODE['深圳']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
+        CITY_CODE['成都']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
+        CITY_CODE['长沙']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
+    }
+ 
+    RULE_PARAMS_REGION_APP_TYPE_48H = {
+        'rule_params': {
+            'rule5': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                      'region_24h_rule_key': 'rule2', '48h_rule_key': 'rule1'},
+        },
+        'data_params': {
+            'data1': [APP_TYPE['VLOG'], ],
+        },
+        'params_list': [
+            {'data': 'data1', 'rule': 'rule5'},
+        ],
+    }
+    
+     # 地域分组小时级规则参数
+    RULE_PARAMS_REGION_APP_TYPE = {
+        'rule_params': {
+            # 'rule2': {'view_type': 'video-show', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule2'},
+            # 'rule3': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+            #           'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule2'},
+            'rule4': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                      'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3'},
+            # 涉政视频过滤
+            'rule4-1': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                        'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', 'political_filter': True},
+            # 特殊地域屏蔽危险视频
+            'rule4-2': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                        'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', 'shield_config': SHIELD_CONFIG2},
+
+            # 'rule6': {'view_type': 'preview', 'platform_return_rate': 0.001,
+            #           'region_24h_rule_key': 'rule3', '24h_rule_key': 'rule2'},
+            'rule7': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                      'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2},
+            'rule7-1': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                        'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2,
+                        'political_filter': True},
+            'rule8': {'view_type': 'preview', 'platform_return_rate': 0.001,
+                      'region_24h_rule_key': 'rule5', '24h_rule_key': 'rule4', 'merge_func': 2},
+            # 'rule9': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+            #           'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', '30day_rule_key': 'rule1'},
+            # # 无回流人群
+            # 'rule10': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+            #            'region_24h_rule_key': 'rule6', '24h_rule_key': 'rule5', 'click_score_rate': 0.7},
+            # 'rule13': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+            #            'region_24h_rule_key': 'rule8', '24h_rule_key': 'rule7', 'click_score_rate': 0.8},
+            # # 有回流人群
+            # 'rule11': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+            #            'region_24h_rule_key': 'rule7', '24h_rule_key': 'rule6', 'back_score_rate': 0.7},
+            # 'rule14': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+            #            'region_24h_rule_key': 'rule9', '24h_rule_key': 'rule8', 'back_score_rate': 0.8},
+            # # 20点地域小时级列表中增加7点-19点地域小时级的优质视频
+            # 'rule12': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+            #            'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', 'add_videos_in_20h': True},
+
+            # 地域小时级列表中增加 前6小时 地域小时级的优质视频
+            'rule15': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
+                       'add_videos_with_pre_h': True, 'hour_count': 6},
+            # 地域小时级列表中增加 前2小时 地域小时级的优质视频,排序优化1:半小时级列表中有的视频以本小时的分数为准
+            'rule16': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
+                       'add_videos_with_pre_h': True, 'hour_count': 2, 'add_func': 'func2'},
+            # 地域小时级列表中增加 前47小时 地域小时级的优质视频
+            'rule17': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
+                       'add_videos_with_pre_h': True, 'hour_count': 47},
+            # 地域小时级列表中增加 前3小时 地域小时级的优质视频,排序优化1:半小时级列表中有的视频以本小时的分数为准
+            'rule18': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
+                       'add_videos_with_pre_h': True, 'hour_count': 3, 'add_func': 'func2'},
+
+        },
+        'data_params': DATA_PARAMS,
+        'params_list': [
+            {'data': 'data1', 'rule': 'rule4'},  # 095 vlog
+            {'data': 'data1', 'rule': 'rule4-1'},  # 095-1
+            {'data': 'data1', 'rule': 'rule4-2'},  # 262 特殊地域屏蔽危险视频
+            # {'data': 'data2', 'rule': 'rule4'},
+            {'data': 'data2', 'rule': 'rule7-1'},  # 121 内容精选
+            # {'data': 'data3', 'rule': 'rule7'},
+            # {'data': 'data4', 'rule': 'rule7'},
+            # {'data': 'data6', 'rule': 'rule7'},
+            {'data': 'data7', 'rule': 'rule8'},  # 票圈视频APP 10003.110156
+            # {'data': 'data1', 'rule': 'rule9'},
+            # {'data': 'data1', 'rule': 'rule10'},
+            # {'data': 'data1', 'rule': 'rule11'},
+            # {'data': 'data8', 'rule': 'rule7'},
+            # {'data': 'data9', 'rule': 'rule7'},
+            {'data': 'data10', 'rule': 'rule7'},  # 144 票圈视频
+            # {'data': 'data11', 'rule': 'rule7'},
+            # {'data': 'data12', 'rule': 'rule7'},
+            # {'data': 'data13', 'rule': 'rule7'},
+            # {'data': 'data1', 'rule': 'rule12'},
+            # {'data': 'data14', 'rule': 'rule7'},  # 159
+            # {'data': 'data1', 'rule': 'rule13'},  # 161
+            # {'data': 'data1', 'rule': 'rule14'},  # 162
+            # {'data': 'data1', 'rule': 'rule15'},  # 200 vlog
+            # {'data': 'data1', 'rule': 'rule16'},  # 214 vlog
+            # {'data': 'data1', 'rule': 'rule17'},  # 215 vlog
+            # {'data': 'data1', 'rule': 'rule18'},  # 224 vlog
+        ],
+    }
+
+class TestConfig(BaseConfig):
+    """测试环境配置"""
+    # 报警内容 环境区分
+    ENV_TEXT = "测试环境"
+    # 项目存放目录
+    PROJECT_PATH = '/data2/rov-offline'
+
+    # 测试环境redis地址
+    REDIS_INFO = {
+        'host': 'r-bp1ps6my7lzg8rdhwx682.redis.rds.aliyuncs.com',
+        'port': 6379,
+        'password': 'Wqsd@2019',
+    }
+
+    # Hologres连接参数,服务器使用
+    HOLOGRES_INFO = {
+        'host': 'hgprecn-cn-7pp28y18c00c-cn-hangzhou-vpc.hologres.aliyuncs.com',
+        'port': 80,
+        'dbname': 'dssm',
+        'user': 'LTAI5tMPqPy9yboQAf1mBCCN',
+        'password': '4BEcOgxREOPq7t3A7EWkjciVULeQGj'
+    }
+
+    # 测试环境mysql地址
+    MYSQL_INFO = {
+        'host': 'rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com',
+        'port': 3306,
+        'user': 'wx2016_longvideo',
+        'password': 'wx2016_longvideoP@assword1234',
+        'db': 'longvideo',
+        'charset': 'utf8'
+    }
+
+    # 测试环境 过滤用mysql地址
+    FILTER_MYSQL_INFO = {
+        'host': 'am-bp1g3ys9u00u483uc131930.ads.aliyuncs.com',
+        'port': 3306,
+        'user': 'lv_manager',
+        'password': 'lv_manager@2020',
+        'db': 'longvideo',
+        'charset': 'utf8'
+    }
+
+    # 日志服务配置
+    ALIYUN_LOG = {
+        'ENDPOINT': 'cn-hangzhou.log.aliyuncs.com',
+        'ACCESSID': 'LTAIWYUujJAm7CbH',
+        'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
+        'PROJECT': 'rov-server-test',
+    }
+
+    # Hologres视频状态存储表名
+    VIDEO_STATUS = 'longvideo_test.dwd_mdm_item_video_stat'
+
+    # 快速曝光流量池ID
+    QUICK_FLOW_POOL_ID = 3
+
+    # 获取流量池分发配置接口地址
+    GET_FLOW_POOL_RECOMMEND_CONFIG_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/getConfig'
+    # 从流量池获取视频接口地址
+    GET_VIDEOS_FROM_POOL_URL = 'http://testapi-internal.piaoquantv.com/flowpool/video/getAllVideo'
+    # 获取视频在流量池中的剩余可分发数接口地址
+    GET_REMAIN_VIEW_COUNT_URL = 'http://testapi-internal.piaoquantv.com/flowpool/video/remainViewCount'
+    # 计算完ROV通知后端接口地址
+    NOTIFY_BACKEND_UPDATE_ROV_SCORE_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/updateRovScore'
+    # 获取置顶视频列表接口地址
+    TOP_VIDEO_LIST_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/topVideoList'
+    # 获取首页兜底视频json接口地址
+    BOTTOM_JSON_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/video/distribute/structure/video/list'
+    # 通知后端更新兜底视频接口地址
+    NOTIFY_BACKEND_updateFallBackVideoList_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/updateFallBackVideoList'
+    # 获取限流视频接口地址
+    GET_VIDEO_LIMIT_LIST_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/getVideoLimitList'
+    # 获取管理后台设置的广告目标uv值接口地址
+    GET_AD_TARGET_UV_URL = 'https://testadmin.piaoquantv.com/manager/ad/algo/threshold/productUvTargetList'
+
+    # # logs 上传oss 目标Bucket指定目录
+    # OSS_FOLDER_LOGS = 'rov-offline/test/logs/'
+    # # data 上传oss 目标Bucket指定目录
+    # OSS_FOLDER_DATA = 'rov-offline/test/data/'
+
+def set_config():
+    # 获取环境变量 ROV_OFFLINE_ENV
+    env = os.environ.get('Base_ENV')
+    return TestConfig()
+
+

+ 307 - 0
db_help.py

@@ -0,0 +1,307 @@
+# coding:utf-8
+import redis
+from config import set_config
+config_  = set_config()
+conn_redis = None
+
+
+class RedisHelper(object):
+    def __init__(self):
+        """
+        初始化redis连接信息
+        redis_info: redis连接信息, 格式:dict, {'host': '', 'port': '', 'password': ''}
+        """
+        redis_info = config_.REDIS_INFO
+        self.host = redis_info['host']
+        self.port = redis_info['port']
+        self.password = redis_info['password']
+
+    def connect(self):
+        """
+        连接redis
+        :return: conn
+        """
+        global conn_redis
+        if conn_redis is None:
+            pool = redis.ConnectionPool(host=self.host,
+                                        port=self.port,
+                                        password=self.password,
+                                        decode_responses=True)
+            conn = redis.Redis(connection_pool=pool)
+            conn_redis = conn
+        return conn_redis
+
+    def key_exists(self, key_name):
+        """
+        判断key是否存在
+        :param key_name: key
+        :return: 存在-True, 不存在-False
+        """
+        conn = self.connect()
+        return conn.exists(key_name)
+
+    def del_keys(self, key_name):
+        """
+        删除key
+        :param key_name: key
+        :return: None
+        """
+        conn = self.connect()
+        conn.delete(key_name)
+
+    def get_data_from_redis(self, key_name):
+        """
+        读取redis中的数据
+        :param key_name: key
+        :return: data
+        """
+        conn = self.connect()
+        if not conn.exists(key_name):
+            # key不存在
+            return None
+        data = conn.get(key_name)
+        return data
+
+    def set_data_to_redis(self, key_name, value, expire_time=24*3600):
+        """
+        新增数据
+        :param key_name: key
+        :param value: 元素的值 videoId
+        :param expire_time: 过期时间,单位:s,默认1天
+        :return: None
+        """
+        conn = self.connect()
+        conn.set(key_name, value, ex=int(expire_time))
+
+    def add_data_with_zset(self, key_name, data, expire_time=7*24*3600):
+        """
+        新增数据,有序set
+        :param key_name: key
+        :param data: 元素的值及对应分数 type-dict  {value: score}
+        :param expire_time: 过期时间,单位:s,默认7天
+        :return: None
+        """
+        if not data:
+            return
+        conn = self.connect()
+        # 数据量大时一次性写入耗时长,分批次写入
+        keys_list = list(data.keys())
+        zadd_data = {}
+        for i, key in enumerate(keys_list):
+            if i % 100 == 0:
+                if zadd_data:
+                    conn.zadd(key_name, zadd_data)
+                zadd_data = {key: data.get(key)}
+            else:
+                zadd_data[key] = data.get(key)
+        if zadd_data:
+            conn.zadd(key_name, zadd_data)
+        # 设置过期时间
+        conn.expire(key_name, int(expire_time))
+
+    def get_data_zset_with_index(self, key_name, start, end, desc=True, with_scores=False):
+        """
+        根据索引位置获取元素的值
+        :param key_name: key
+        :param start: 索引起始点 闭区间,包含start
+        :param end: 索引结束点 闭区间,包含end
+        :param desc: 分数排序方式,默认从大到小
+        :param with_scores: 是否获取元素的分数,默认 False,只获取元素的值
+        :return: data 元素值列表(不包含分数),value(videoId)类型转换为int, 包含分数时不进行类型转换
+        """
+        conn = self.connect()
+        if not conn.exists(key_name):
+            return None
+        data = conn.zrange(key_name, start, end, desc, with_scores)
+        return data
+        # if with_scores:
+        #     return data
+        # else:
+        #     return [eval(value) for value in data]
+
+    def get_all_data_from_zset(self, key_name, desc=True, with_scores=False):
+        """
+        获取zset中所有元素的值
+        :param key_name: key
+        :param desc: 分数排序方式,默认从大到小
+        :param with_scores: 是否获取元素的分数,默认 False,只获取元素的值
+        :return: data 元素值列表(不包含分数),value(videoId)类型转换为int, 包含分数时不进行类型转换
+        """
+        conn = self.connect()
+        if not conn.exists(key_name):
+            return None
+        data = []
+        start = 0
+        step = 100
+        while True:
+            end = start + step - 1
+            temp = conn.zrange(key_name, start, end, desc, with_scores)
+            if not temp:
+                break
+            data.extend(temp)
+            start += step
+        return data
+
+    def get_score_with_value(self, key_name, value):
+        """
+        在zset中,根据元素的value获取对应的score
+        :param key_name: key
+        :param value: 元素的值
+        :return: score value对应的score
+        """
+        conn = self.connect()
+        return conn.zscore(key_name, value)
+
+    def update_score_with_value(self, key_name, value, score, expire_time=7*24*3600):
+        """
+        在zset中,修改元素value对应的score
+        :param key_name: key
+        :param value: 元素的值
+        :param score: value对应的score更新值
+        :param expire_time: 过期时间,单位:s,默认7天
+        """
+        conn = self.connect()
+        if conn.exists(key_name):
+            conn.zadd(key_name, {value: score})
+        else:
+            # key不存在时,需设置过期时间
+            conn.zadd(key_name, {value: score})
+            conn.expire(key_name, expire_time)
+
+    def remove_value_from_zset(self, key_name, value):
+        """
+        删除zset中的指定元素
+        :param key_name: key
+        :param value: 元素的值
+        :return: None
+        """
+        conn = self.connect()
+        conn.zrem(key_name, *value)
+
+    def remove_by_rank_from_zset(self, key_name, start, stop):
+        """
+        移除有序集中,指定排名(rank)区间内的所有成员
+        :param key_name: key
+        :param start: 开始位
+        :param stop: 结束位
+        :return: None
+        """
+        conn = self.connect()
+        conn.zremrangebyrank(name=key_name, min=start, max=stop)
+
+    def get_index_with_data(self, key_name, value):
+        """
+        根据元素的值获取在有序set中的位置,按照分数倒序(从大到小)
+        :param key_name: key
+        :param value: 元素的值
+        :return: idx 位置索引
+        """
+        conn = self.connect()
+        return conn.zrevrank(key_name, value)
+
+    def get_data_from_set(self, key_name):
+        """
+        获取set中的所有数据
+        :param key_name: key
+        :return: data
+        """
+        conn = self.connect()
+        if not conn.exists(key_name):
+            # key不存在
+            return None
+        data = []
+        cursor = 0
+        while True:
+            cur, temp = conn.sscan(key_name, cursor=cursor, count=2000)
+            data.extend(temp)
+            if cur == 0:
+                break
+            cursor = cur
+        return list(set(data))
+
+    def add_data_with_set(self, key_name, values, expire_time=30*60):
+        """
+        新增数据,set
+        :param key_name: key
+        :param values: 要添加的元素  类型-set
+        :param expire_time: 过期时间,单位:s,默认0.5小时
+        :return: None
+        """
+        conn = self.connect()
+        conn.sadd(key_name, *values)
+        # 设置过期时间
+        conn.expire(key_name, expire_time)
+
+    def data_exists_with_set(self, key_name, value):
+        """
+        判断元素value是否在集合key_name中
+        :param key_name: key
+        :param value: 需判断的元素
+        :return: 存在-True, 不存在-False
+        """
+        conn = self.connect()
+        return conn.sismember(key_name, value)
+
+    def remove_value_from_set(self, key_name, values):
+        """
+        删除set中的指定元素
+        :param key_name: key
+        :param values: 元素的值, 类型-set
+        :return: None
+        """
+        conn = self.connect()
+        conn.srem(key_name, *values)
+
+    def persist_key(self, key_name):
+        """
+        移除key的过期时间,将其转换为永久状态
+        :param key_name: key
+        :return:
+        """
+        conn = self.connect()
+        conn.persist(key_name)
+
+    def setnx_key(self, key_name, value, expire_time=5*60):
+        """
+        当key不存在时,将value塞入key中,key存在时不做操作
+        :param key_name: key
+        :param value: value
+        :return: 过期时间,单位:s,默认5分钟 type-int
+        """
+        conn = self.connect()
+        conn.setnx(name=key_name, value=value)
+        conn.expire(name=key_name, time=int(expire_time))
+
+    def update_expire_time(self, key_name, expire_time):
+        """
+        修改过期时间
+        :param key_name: key
+        :param expire_time: 过期时间
+        :return:
+        """
+        conn = self.connect()
+        conn.expire(name=key_name, time=int(expire_time))
+
+    def update_batch_setnx_key(self, data, expire_time=5*60):
+        conn = self.connect()
+        for key_name, v in data.items():
+            conn.set(name=key_name, value=v)
+            conn.expire(name=key_name, time=int(expire_time))
+        #self.disconnet()
+ 
+    #def disconnet(self):
+    #    conn_redis.disconnect()        
+
+if __name__ == '__main__':
+    redis_helper = RedisHelper()
+    # key = 'com.weiqu.video.hot.recommend.item.score.20210901'
+    # res = redis_helper.get_score_with_value(key, 90797)
+    # print(res)
+    # redis_helper.remove_value_from_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME, values=(8633849,))
+    con = redis_helper.connect()
+    res = redis_helper.key_exists(key_name='eeew')
+    print(res)
+    redis_helper.setnx_key('eeew','1')
+    res = redis_helper.key_exists(key_name='eeew')
+    print(res)
+

+ 54 - 0
extract_share_log.py

@@ -0,0 +1,54 @@
+#coding utf-8
+from odps import ODPS
+from config import set_config
+import datetime
+import pandas as pd
+from collections import defaultdict
+import sys
+
+config_ = set_config()
+
+odps = ODPS(
+        access_id=config_.ODPS_CONFIG['ACCESSID'],
+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
+        project="loghubods",
+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
+
+
+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
+                       pool_maxsize=1000, pool_connections=1000):
+    """
+    从odps获取数据
+    :param date: 日期 type-string '%Y%m%d'
+    :param project: type-string
+    :param table: 表名 type-string
+    :param connect_timeout: 连接超时设置
+    :param read_timeout: 读取超时设置
+    :param pool_maxsize:
+    :param pool_connections:
+    :return: records
+    """
+    records = odps.read_table(name=table, partition='dt=%s' % date)
+    return records
+
+def exe_sql(sql):    
+    data = []
+    with odps.execute_sql(sql).open_reader() as reader:
+        d = defaultdict(list)  # collection默认一个dict
+        for record in reader:
+            for res in record:
+                d[res[0]].append(res[1])  # 解析record中的每一个元组,存储方式为(k,v),以k作为key,存储每一列的内容;
+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框,并转置,不转置的话是横条数据
+    return data
+
+
+if __name__=="__main__":
+    project = 'loghubods'
+    last7day=sys.argv[1]
+    now_date=sys.argv[2]
+    print("now date:", now_date)
+    table = 'user_share_log'
+    sql = "select machinecode, shareobjectid from loghubods.user_share_log where dt between '"+last7day+"' and '"+now_date+"' and topic='share';"
+    print(sql)
+    data = exe_sql(sql)
+    data.to_csv("./data/user_item_share_"+now_date, sep='\t') 

+ 25 - 0
import_redist.py

@@ -0,0 +1,25 @@
+#coding utf-8
+import sys
+import json
+from db_help import RedisHelper
+
+if __name__=="__main__":
+     with  open(sys.argv[1]) as f:
+         rec_json_list=json.load(f)
+         #print(rec_json_list)
+         import_data_dict = {}
+         for line in rec_json_list:
+             #print(line)
+             rec_list = line[1][:10]
+             rec_item_list = []
+             for rec_item in rec_list:
+                 rec_item_list.append((rec_item[0], round(rec_item[1],3)))
+             print(line[0]+"\t"+json.dumps(rec_item_list))
+             key="sim_"+line[0]
+             import_data_dict[key] = json.dumps(rec_item_list)
+         redis_helper = RedisHelper()
+         #redis_helper.update_batch_setnx_key(import_data_dict, 60*60*12)
+         #con = redis_helper.connect()
+         res = redis_helper.get_data_from_redis("sim_14330133")
+         print(res)
+   

+ 19 - 0
run.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+source ~/.bash_profile
+source ~/.bashrc
+
+conda activate python36 
+
+#1. download data
+nowday=`date  +"%Y%m%d" -d -0days`
+last7day=`date  +"%Y%m%d" -d -8days`
+echo ${nowday} 
+echo ${last7day}
+#python extract_share_log.py ${last7day} ${nowday}
+
+#nowday='20230505'
+#2. cal i2i result
+#python calI2I.py ${nowday} 
+
+#3.import res
+python import_redist.py "./data/rec_result_"${nowday}".json"