12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- #coding utf-8
- import sys
- from operator import itemgetter
- import json
- import pandas as pd
- from db_help import RedisHelper
- if __name__=="__main__":
- #1.load data
- nowdate=sys.argv[1]
- vlog='0'
- love_live = 4
- data_path = "./data/7_days_video_data_"+nowdate
- f = open(data_path)
- #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
- #print(data)
- index = 0
- data_dict = {}
- redis_helper = RedisHelper()
- while True:
- line = f.readline()
- if not line:
- break
- if index==0:
- index += 1
- continue
- index +=1
- items = line.strip().split("\t")
- if len(items)<11:
- continue
- vid = int(items[1])
- view_users = int(items[4])
- view_pv = int(items[5])
- play_users = int(items[6])
- play_pv = int(items[7])
- share_users = int(items[8])
- share_pv = int(items[9])
- return_users = int(items[10])
- #print(items)
- if vid not in data_dict:
- data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
- else:
- item_info = data_dict[vid]
- view_users = item_info[0]+view_users
- view_pv = item_info[1]+view_pv
- play_users = item_info[2]+play_pv
- share_users = item_info[3]+share_users
- share_pv = item_info[4]+share_pv
- return_users = item_info[5]+return_users
- data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
- #print(data_dict.items())
- info_dict = {}
- data_path = "./data/7_days_recall_hot_"+nowdate
- f = open(data_path, 'w')
- for k, v in data_dict.items():
- #print(v)
- return_users = v[6]
- #print(return_users)
- view_users = v[0]
- view_pv = v[1]
- if view_pv<100 or view_users<10:
- continue
- share_pv = v[5]
- share_users = v[4]
- play_users = v[2]
- play_pv = v[3]
- #print("return_users:", return_users)
- k_score = float(return_users)/(float(view_users)+5)
- #print(k_score)
- share_score = float(share_pv)/(float(view_pv)+5)
- backrate = float(return_users)/(float(view_users)+5)
- ctr_score = float(play_pv)/float(view_pv+5)
- if ctr_score<=0.5:
- continue
- #print(k, k_score, share_score*backrate, share_score, backrate)
- score_info = [k_score, share_score*backrate, share_score, backrate, ctr_score, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
- info_dict[k] = score_info
- sorted_v = sorted(info_dict.items(), key=lambda s:s[1][1], reverse=True)
- print("sorted_v:", sorted_v[:100])
- recall_name = "hot_7day:"
- hot_recall_res = []
- for item in sorted_v[:100]:
- hot_recall_res.append((item[0], item[1][1]))
- if len(hot_recall_res)>10:
- score_info = json.dumps(hot_recall_res)
- print("score_info:", score_info)
- redis_helper.set_data_to_redis(recall_name, score_info, 60*60*24*15)
- f.write(recall_name+"\t"+score_info+"\n")
- #info_dict[k] = score_info
- #f.write(k+"\t"+score_info+"\n")
- #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15)
- f.close()
-
|