#coding utf-8 import sys from operator import itemgetter import json import pandas as pd from db_help import RedisHelper if __name__=="__main__": #1.load data nowdate=sys.argv[1] vlog='0' love_live = 4 data_path = "./data/7_days_video_data_"+nowdate f = open(data_path) #data = pd.read_csv(data_path, encoding="utf-8", sep='\t') #print(data) index = 0 data_dict = {} redis_helper = RedisHelper() while True: line = f.readline() if not line: break if index==0: index += 1 continue index +=1 items = line.strip().split("\t") if len(items)<11: continue vid = int(items[1]) view_users = int(items[4]) view_pv = int(items[5]) play_users = int(items[6]) play_pv = int(items[7]) share_users = int(items[8]) share_pv = int(items[9]) return_users = int(items[10]) #print(items) if vid not in data_dict: data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users) else: item_info = data_dict[vid] view_users = item_info[0]+view_users view_pv = item_info[1]+view_pv play_users = item_info[2]+play_pv share_users = item_info[3]+share_users share_pv = item_info[4]+share_pv return_users = item_info[5]+return_users data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users) #print(data_dict.items()) info_dict = {} data_path = "./data/7_days_recall_hot_"+nowdate f = open(data_path, 'w') for k, v in data_dict.items(): #print(v) return_users = v[6] #print(return_users) view_users = v[0] view_pv = v[1] if view_pv<100 or view_users<10: continue share_pv = v[5] share_users = v[4] play_users = v[2] play_pv = v[3] #print("return_users:", return_users) k_score = float(return_users)/(float(view_users)+5) #print(k_score) share_score = float(share_pv)/(float(view_pv)+5) backrate = float(return_users)/(float(view_users)+5) ctr_score = float(play_pv)/float(view_pv+5) if ctr_score<=0.5: continue #print(k, k_score, share_score*backrate, share_score, backrate) score_info = [k_score, share_score*backrate, share_score, backrate, ctr_score, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users] info_dict[k] = score_info sorted_v = sorted(info_dict.items(), key=lambda s:s[1][1], reverse=True) print("sorted_v:", sorted_v[:100]) recall_name = "hot_7day:" hot_recall_res = [] for item in sorted_v[:100]: hot_recall_res.append((item[0], item[1][1])) if len(hot_recall_res)>10: score_info = json.dumps(hot_recall_res) print("score_info:", score_info) redis_helper.set_data_to_redis(recall_name, score_info, 60*60*24*15) f.write(recall_name+"\t"+score_info+"\n") #info_dict[k] = score_info #f.write(k+"\t"+score_info+"\n") #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) f.close()